[{"data":1,"prerenderedAt":627},["ShallowReactive",2],{"navigation_docs_fr":3,"-fr-gray-scott-school-jour-7":66,"-fr-gray-scott-school-jour-7-surround":622},[4,50,60],{"title":5,"path":6,"stem":7,"children":8},"L'École Gray Scott","/fr/gray-scott-school","fr/1.gray-scott-school/01.index",[9,10,14,18,22,26,30,34,38,42,46],{"title":5,"path":6,"stem":7},{"title":11,"path":12,"stem":13},"Présentation CINERI","/fr/gray-scott-school/presentation-cineri","fr/1.gray-scott-school/02.presentation-cineri",{"title":15,"path":16,"stem":17},"Jour 1 — Fondations","/fr/gray-scott-school/jour-1","fr/1.gray-scott-school/03.jour-1",{"title":19,"path":20,"stem":21},"Jour 2 — C++ sur CPU","/fr/gray-scott-school/jour-2","fr/1.gray-scott-school/04.jour-2",{"title":23,"path":24,"stem":25},"Jour 3 — Fortran sur CPU","/fr/gray-scott-school/jour-3","fr/1.gray-scott-school/05.jour-3",{"title":27,"path":28,"stem":29},"Jour 4 — Kokkos sur CPU","/fr/gray-scott-school/jour-4","fr/1.gray-scott-school/06.jour-4",{"title":31,"path":32,"stem":33},"Jour 5 — Python sur CPU","/fr/gray-scott-school/jour-5","fr/1.gray-scott-school/07.jour-5",{"title":35,"path":36,"stem":37},"Jour 6 — SIMD avec EVE + architecture GPU","/fr/gray-scott-school/jour-6","fr/1.gray-scott-school/08.jour-6",{"title":39,"path":40,"stem":41},"Jour 7 — Python sur GPU","/fr/gray-scott-school/jour-7","fr/1.gray-scott-school/09.jour-7",{"title":43,"path":44,"stem":45},"Jour 8 — Fortran sur GPU","/fr/gray-scott-school/jour-8","fr/1.gray-scott-school/10.jour-8",{"title":47,"path":48,"stem":49},"Jour 9 — Kokkos sur GPU","/fr/gray-scott-school/jour-9","fr/1.gray-scott-school/11.jour-9",{"title":51,"path":52,"stem":53,"children":54},"Projets","/fr/projets","fr/2.projets/1.index",[55,56],{"title":51,"path":52,"stem":53},{"title":57,"path":58,"stem":59},"SenLand","/fr/projets/senland","fr/2.projets/2.senland",{"title":61,"path":62,"stem":63,"children":64},"À propos","/fr/a-propos","fr/3.a-propos/1.index",[65],{"title":61,"path":62,"stem":63},{"id":67,"title":39,"badge":68,"body":69,"category":68,"description":610,"extension":611,"links":612,"meta":617,"navigation":619,"path":40,"seo":620,"stem":41,"tags":68,"__hash__":621},"docs_fr/fr/1.gray-scott-school/09.jour-7.md",null,{"type":70,"value":71,"toc":598},"minimark",[72,111,116,123,126,147,151,154,158,173,208,215,219,237,241,251,306,317,321,331,375,383,393,399,494,508,512,518,522,594],[73,74,75],"blockquote",{},[76,77,78,82,83,86,87,86,90,93,94,97,98,101,102,106,107,110],"p",{},[79,80,81],"strong",{},"30 juin 2026"," · Intervenants : ",[79,84,85],{},"Alice Faure",", ",[79,88,89],{},"Jean-Marc Colley",[79,91,92],{},"Sébastien Valat","\n& ",[79,95,96],{},"Nabil Garroum"," · quatre sessions Python-GPU s'enchaînent (CuPy → cuPyNumeric → ",[79,99,100],{},"JAX à\n14 h"," → synthèse) · Auditorium Marcel Vivargent + satellites (dont la CINERI). Le TP vit\ndans ",[103,104,105],"code",{},"GrayScott2026/day-5/GPU/"," — trois tutoriels + solutions, et les ",[79,108,109],{},"benchmarks A100\nofficiels",".",[112,113,115],"h2",{"id":114},"_1-ce-qui-change-vraiment-le-péage-pcie","1. Ce qui change vraiment : le péage PCIe",[76,117,118,119,122],{},"Le code array-first du Jour 5 se transpose presque tel quel — ce qui change, c'est la\n",[79,120,121],{},"géographie mémoire",". Le GPU calcule à ~2 To/s mais se nourrit par un tuyau de ~32 Go/s :",[124,125],"d7-transfer",{},[76,127,128,129,132,133,136,137,140,141,136,144,146],{},"Toute la journée applique la même règle : ",[103,130,131],{},"cp.asarray"," / ",[103,134,135],{},"jax.device_put"," ",[79,138,139],{},"une fois"," au\ndépart, toute la boucle de temps sur le device, ",[103,142,143],{},"asnumpy",[79,145,139],{}," à l'arrivée.",[112,148,150],{"id":149},"_2-trois-routes-vers-le-même-gpu","2. Trois routes vers le même GPU",[152,153],"d7-stack",{},[112,155,157],{"id":156},"session-cupy-numpy-sur-cuda-sans-réécriture","Session CuPy — NumPy sur CUDA, sans réécriture",[76,159,160,161,164,165,168,169,172],{},"Tutoriel ",[103,162,163],{},"3_Python_GPU_Cupy.md"," : CuPy reflète l'API NumPy sur CUDA — remplacer ",[103,166,167],{},"numpy"," par\n",[103,170,171],{},"cupy"," exécute le même stencil sur le GPU.",[174,175,180],"pre",{"className":176,"code":177,"language":178,"meta":179,"style":179},"language-python shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","import cupy as cp\nu = cp.asarray(u_host)      # hôte → device, UNE fois\n# … mêmes expressions de stencil qu'en NumPy …\nu_host = cp.asnumpy(u)      # device → hôte, seulement au besoin\n","python","",[103,181,182,190,196,202],{"__ignoreMap":179},[183,184,187],"span",{"class":185,"line":186},"line",1,[183,188,189],{},"import cupy as cp\n",[183,191,193],{"class":185,"line":192},2,[183,194,195],{},"u = cp.asarray(u_host)      # hôte → device, UNE fois\n",[183,197,199],{"class":185,"line":198},3,[183,200,201],{},"# … mêmes expressions de stencil qu'en NumPy …\n",[183,203,205],{"class":185,"line":204},4,[183,206,207],{},"u_host = cp.asnumpy(u)      # device → hôte, seulement au besoin\n",[76,209,210,211,214],{},"Bonus du TP : CuPy est la seule version où l'",[79,212,213],{},"I/O HDF5 parallèle"," est implémentée — écrire\nles 1000 images pendant que le GPU calcule économise ~4 s (12 s au total sur A100).",[112,216,218],{"id":217},"session-cupynumeric-numpy-distribué","Session cuPyNumeric — NumPy distribué",[76,220,160,221,224,225,228,229,232,233,236],{},[103,222,223],{},"2_Python_GPU_cuPyNumeric.md"," : cuPyNumeric (NVIDIA, moteur ",[79,226,227],{},"Legate",") exécute du\ncode NumPy sur plusieurs GPU et plusieurs nœuds ",[79,230,231],{},"sans MPI et sans réécriture"," — le même\nscript, une machine plus grande. Le prix de la généralité se lit dans le benchmark : la\nversion ",[103,234,235],{},"convolve"," générique est la plus lente du plateau (128 s).",[112,238,240],{"id":239},"session-jax-14-h-le-code-du-jour-5-re-jitté","Session JAX (14 h) — le code du Jour 5, re-jitté",[76,242,160,243,246,247,250],{},[103,244,245],{},"1_Python_GPU_JAX.md"," : le Gray-Scott JAX du Jour 5 rejoue ",[79,248,249],{},"sans changement"," —\nXLA compile le stencil tracé en noyau CUDA, et JAX place les tableaux sur le device par\ndéfaut. Les solutions du TP montrent les trois outils qui font la différence :",[252,253,254,267],"table",{},[255,256,257],"thead",{},[258,259,260,264],"tr",{},[261,262,263],"th",{},"Solution du TP",[261,265,266],{},"Ce qu'elle apprend",[268,269,270,281,296],"tbody",{},[258,271,272,278],{},[273,274,275],"td",{},[103,276,277],{},"jax_vmap_solutions.py",[273,279,280],{},"vectoriser une fonction sur un axe entier (batch)",[258,282,283,288],{},[273,284,285],{},[103,286,287],{},"jax_fori_loop_solutions.py",[273,289,290,291,295],{},"fusionner la boucle de temps ",[292,293,294],"em",{},"dans"," le graphe compilé",[258,297,298,303],{},[273,299,300],{},[103,301,302],{},"jax_scan_solutions.py",[273,304,305],{},"accumuler les états sans retour Python entre les pas",[76,307,308,309,312,313,316],{},"C'est exactement la boîte à outils du port JAX de ",[310,311,57],"a",{"href":58},"\n(",[103,314,315],{},"lax.fori_loop"," pour fusionner les steps, batch résident sur device).",[112,318,320],{"id":319},"le-verdict-chiffres-a100-officiels","Le verdict — chiffres A100 officiels",[76,322,323,326,327,330],{},[103,324,325],{},"GPU/Benchmarks.md"," du dépôt : ",[79,328,329],{},"32×1000 itérations, grille 1920×1080 en float32"," :",[252,332,333,353],{},[255,334,335],{},[258,336,337,341,344,347,350],{},[261,338,340],{"align":339},"center","cuPyNumeric (convolve)",[261,342,343],{"align":339},"JAX (générique)",[261,345,346],{"align":339},"JAX (3×3)",[261,348,349],{"align":339},"CuPy",[261,351,352],{"align":339},"PyTorch",[268,354,355],{},[258,356,357,360,363,368,372],{},[273,358,359],{"align":339},"128 s",[273,361,362],{"align":339},"47 s",[273,364,365],{"align":339},[79,366,367],{},"18 s",[273,369,370],{"align":339},[79,371,367],{},[273,373,374],{"align":339},"22 s",[376,377],"gs-bar-chart",{":categories":378,":series":379,"note":380,"title":381,"unit":382},"[\"cuPyNumeric\",\"JAX (générique)\",\"JAX (3×3)\",\"CuPy\",\"PyTorch\"]","[{\"name\":\"A100\",\"values\":[128,47,18,18,22]}]","Chiffres officiels du dépôt (GPU/Benchmarks.md). CuPy + I/O HDF5 parallèle : 12 s.","Gray-Scott Python sur A100 (32×1000 itérations, 1920×1080 float32)"," s",[76,384,385,386,389,390,392],{},"La boucle se boucle : les ",[79,387,388],{},"377 s"," du meilleur CPU du Jour 5 tombent à ",[79,391,367],{}," sur A100 —\n×21, toujours en Python. Et le classement rappelle les leçons de la semaine : la\nspécialisation du stencil (Jour 5) et la résidence des données (aujourd'hui) pèsent plus que\nle choix de la bibliothèque.",[112,394,396,397],{"id":395},"le-tp-grayscott2026day-5gpu","Le TP — ",[103,398,105],{},[174,400,404],{"className":401,"code":402,"language":403,"meta":179,"style":179},"language-bash shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","# En local (NVIDIA, CUDA ≥ 12, Python 3.10-3.12)\ngit clone https://gitlab.in2p3.fr/alice.faure/gray-scott-python.git\npython -m venv gpu-env && source gpu-env/bin/activate\npip install h5py opencv-python numpy matplotlib scipy \\\n            \"jax[cuda12]\" cupy-cuda12x nvidia-cupynumeric\n","bash",[103,405,406,412,425,449,476],{"__ignoreMap":179},[183,407,408],{"class":185,"line":186},[183,409,411],{"class":410},"sHwdD","# En local (NVIDIA, CUDA ≥ 12, Python 3.10-3.12)\n",[183,413,414,418,422],{"class":185,"line":192},[183,415,417],{"class":416},"sBMFI","git",[183,419,421],{"class":420},"sfazB"," clone",[183,423,424],{"class":420}," https://gitlab.in2p3.fr/alice.faure/gray-scott-python.git\n",[183,426,427,429,432,435,438,442,446],{"class":185,"line":198},[183,428,178],{"class":416},[183,430,431],{"class":420}," -m",[183,433,434],{"class":420}," venv",[183,436,437],{"class":420}," gpu-env",[183,439,441],{"class":440},"sMK4o"," &&",[183,443,445],{"class":444},"s2Zo4"," source",[183,447,448],{"class":420}," gpu-env/bin/activate\n",[183,450,451,454,457,460,463,466,469,472],{"class":185,"line":204},[183,452,453],{"class":416},"pip",[183,455,456],{"class":420}," install",[183,458,459],{"class":420}," h5py",[183,461,462],{"class":420}," opencv-python",[183,464,465],{"class":420}," numpy",[183,467,468],{"class":420}," matplotlib",[183,470,471],{"class":420}," scipy",[183,473,475],{"class":474},"sTEyZ"," \\\n",[183,477,479,482,485,488,491],{"class":185,"line":478},5,[183,480,481],{"class":440},"            \"",[183,483,484],{"class":420},"jax[cuda12]",[183,486,487],{"class":440},"\"",[183,489,490],{"class":420}," cupy-cuda12x",[183,492,493],{"class":420}," nvidia-cupynumeric\n",[76,495,496,497,500,501,312,504,507],{},"Alternatives officielles : image ",[79,498,499],{},"Docker"," du cours, ou ",[79,502,503],{},"apptainer sur le cluster MUST",[103,505,506],{},"Install_satellite_sites.md"," pour les sites comme la CINERI). AMD : CuPy et JAX ont des\nroutes ROCm expérimentales — cuPyNumeric non. Sur une petite carte locale (GTX 1650, 4 Go),\non réduit la grille : la leçon de résidence des données reste identique.",[112,509,511],{"id":510},"en-vidéo-le-replay-officiel","En vidéo — le replay officiel",[513,514],"yt-embed",{"caption":515,"id":516,"title":517},"Replay — Python On GPU (Gray Scott Thursdays)","4RsXXTCHzLo","Python On GPU",[112,519,521],{"id":520},"sources-matériel-officiel","Sources & matériel officiel",[523,524,525,542,564,574,584],"ul",{},[526,527,528,531,532,535,536],"li",{},[79,529,530],{},"Le dépôt du cours"," (tutoriels ",[103,533,534],{},"GPU/tutorial/",", solutions, benchmarks A100) :\n",[310,537,541],{"href":538,"rel":539},"https://gitlab.in2p3.fr/alice.faure/gray-scott-python",[540],"nofollow","gitlab.in2p3.fr/alice.faure/gray-scott-python",[526,543,544,547,548,553,554,553,559],{},[79,545,546],{},"Les bibliothèques"," :\n",[310,549,552],{"href":550,"rel":551},"https://docs.cupy.dev/",[540],"docs.cupy.dev"," ·\n",[310,555,558],{"href":556,"rel":557},"https://docs.nvidia.com/cupynumeric/latest/",[540],"docs.nvidia.com/cupynumeric",[310,560,563],{"href":561,"rel":562},"https://docs.jax.dev/",[540],"docs.jax.dev",[526,565,566,547,569],{},[79,567,568],{},"La plateforme MUST",[310,570,573],{"href":571,"rel":572},"https://jupyter.must-dc.cloud",[540],"jupyter.must-dc.cloud",[526,575,576,547,579],{},[79,577,578],{},"Replays vidéo (YouTube)",[310,580,583],{"href":581,"rel":582},"https://www.youtube.com/playlist?list=PLiZttWgOMudb6PsUoWtxY3G4Gv8f2lurG",[540],"Gray Scott Thursdays",[526,585,586,547,589],{},[79,587,588],{},"Site de l'école",[310,590,593],{"href":591,"rel":592},"https://cta-lapp.pages.in2p3.fr/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/index.html",[540],"GrayScott2026",[595,596,597],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html pre.shiki code .sMK4o, html code.shiki .sMK4o{--shiki-light:#39ADB5;--shiki-default:#89DDFF;--shiki-dark:#89DDFF}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sTEyZ, html code.shiki .sTEyZ{--shiki-light:#90A4AE;--shiki-default:#EEFFFF;--shiki-dark:#BABED8}",{"title":179,"searchDepth":192,"depth":192,"links":599},[600,601,602,603,604,605,606,608,609],{"id":114,"depth":192,"text":115},{"id":149,"depth":192,"text":150},{"id":156,"depth":192,"text":157},{"id":217,"depth":192,"text":218},{"id":239,"depth":192,"text":240},{"id":319,"depth":192,"text":320},{"id":395,"depth":192,"text":607},"Le TP — GrayScott2026/day-5/GPU/",{"id":510,"depth":192,"text":511},{"id":520,"depth":192,"text":521},"30 juin, quatre sessions avec Alice Faure, Jean-Marc Colley, Sébastien Valat et Nabil Garroum : CuPy, cuPyNumeric et JAX portent le Gray-Scott du Jour 5 sur l'accélérateur — chiffres A100 officiels à l'appui.","md",[613],{"label":614,"icon":615,"to":538,"target":616},"Dépôt du cours","i-lucide-git-branch","_blank",{"icon":618},"lucide:zap",true,{"title":39,"description":610},"pYVW6x7ndpwxaJCO_sSFpuObWJjTBVxBLYd59s0F4WA",[623,625],{"title":35,"path":36,"stem":37,"description":624,"children":-1},"29 juin, deux sessions : Joël Falcou ouvre la semaine avec EVE et Kiwaku (SIMD C++20 explicite et portable), Pierre Aubert enchaîne avec l'architecture GPU qui portera les trois derniers jours.",{"title":43,"path":44,"stem":45,"description":626,"children":-1},"1er juillet, avec Vincent Lafage : le Fortran standard sur GPU via do concurrent, comparé à OpenACC et OpenMP target — mesuré en local sur GTX 1650 — puis la session polyglotte Julia · Rust · C++ · pixi de Pierre Aubert.",1783172492865]