[{"data":1,"prerenderedAt":457},["ShallowReactive",2],{"navigation_docs_fr":3,"-fr-gray-scott-school-jour-9":66,"-fr-gray-scott-school-jour-9-surround":452},[4,50,60],{"title":5,"path":6,"stem":7,"children":8},"L'École Gray Scott","/fr/gray-scott-school","fr/1.gray-scott-school/01.index",[9,10,14,18,22,26,30,34,38,42,46],{"title":5,"path":6,"stem":7},{"title":11,"path":12,"stem":13},"Présentation CINERI","/fr/gray-scott-school/presentation-cineri","fr/1.gray-scott-school/02.presentation-cineri",{"title":15,"path":16,"stem":17},"Jour 1 — Fondations","/fr/gray-scott-school/jour-1","fr/1.gray-scott-school/03.jour-1",{"title":19,"path":20,"stem":21},"Jour 2 — C++ sur CPU","/fr/gray-scott-school/jour-2","fr/1.gray-scott-school/04.jour-2",{"title":23,"path":24,"stem":25},"Jour 3 — Fortran sur CPU","/fr/gray-scott-school/jour-3","fr/1.gray-scott-school/05.jour-3",{"title":27,"path":28,"stem":29},"Jour 4 — Kokkos sur CPU","/fr/gray-scott-school/jour-4","fr/1.gray-scott-school/06.jour-4",{"title":31,"path":32,"stem":33},"Jour 5 — Python sur CPU","/fr/gray-scott-school/jour-5","fr/1.gray-scott-school/07.jour-5",{"title":35,"path":36,"stem":37},"Jour 6 — SIMD avec EVE + architecture GPU","/fr/gray-scott-school/jour-6","fr/1.gray-scott-school/08.jour-6",{"title":39,"path":40,"stem":41},"Jour 7 — Python sur GPU","/fr/gray-scott-school/jour-7","fr/1.gray-scott-school/09.jour-7",{"title":43,"path":44,"stem":45},"Jour 8 — Fortran sur GPU","/fr/gray-scott-school/jour-8","fr/1.gray-scott-school/10.jour-8",{"title":47,"path":48,"stem":49},"Jour 9 — Kokkos sur GPU","/fr/gray-scott-school/jour-9","fr/1.gray-scott-school/11.jour-9",{"title":51,"path":52,"stem":53,"children":54},"Projets","/fr/projets","fr/2.projets/1.index",[55,56],{"title":51,"path":52,"stem":53},{"title":57,"path":58,"stem":59},"SenLand","/fr/projets/senland","fr/2.projets/2.senland",{"title":61,"path":62,"stem":63,"children":64},"À propos","/fr/a-propos","fr/3.a-propos/1.index",[65],{"title":61,"path":62,"stem":63},{"id":67,"title":47,"badge":68,"body":69,"category":68,"description":440,"extension":441,"links":442,"meta":447,"navigation":449,"path":48,"seo":450,"stem":49,"tags":68,"__hash__":451},"docs_fr/fr/1.gray-scott-school/11.jour-9.md",null,{"type":70,"value":71,"toc":425},"minimark",[72,128,133,138,153,157,164,196,207,211,240,243,253,307,310,314,318,325,329,340,344,349,353,421],[73,74,75],"blockquote",{},[76,77,78,82,83,86,87,90,91,94,95,99,100,103,104,107,108,111,112,116,117,120,121,120,124,127],"p",{},[79,80,81],"strong",{},"2 juillet 2026 — dernier jour"," · Matin : ",[79,84,85],{},"Paul Zehner",", ",[79,88,89],{},"Juan-José Silva Cuevas"," &\n",[79,92,93],{},"Thomas Padioleau"," (Maison de la Simulation, CEA) — ",[96,97,98],"em",{},"Kokkos on GPU"," · 17 h :\n",[79,101,102],{},"Vincent Lafage"," — ",[96,105,106],{},"A story about cubic root optimisation in C++ and Fortran"," · puis la\n",[79,109,110],{},"présentation de clôture",". Le TP reprend le dépôt du Jour 4 (",[113,114,115],"code",{},"day-4/exercises/","), côté\n",[113,118,119],{},"gpu"," → ",[113,122,123],{},"gpu_async",[113,125,126],{},"gpu_async_more",".",[129,130,132],"h2",{"id":131},"session-du-matin-kokkos-atteint-laccélérateur","Session du matin — Kokkos atteint l'accélérateur",[134,135,137],"h3",{"id":136},"_1-une-source-tous-les-backends","1. Une source, tous les backends",[76,139,140,141,144,145,148,149,152],{},"Le noyau Kokkos du Jour 4 atterrit sur le GPU : le backend est choisi à la compilation — OpenMP\npour les cœurs CPU, ",[79,142,143],{},"CUDA"," (NVIDIA), ",[79,146,147],{},"HIP"," (AMD), ",[79,150,151],{},"SYCL"," (Intel). Le même Gray-Scott tourne\nsur une station, sur un nœud Jean-Zay (GPU Volta), ou demain sur un accélérateur AMD.",[134,154,156],{"id":155},"_2-la-portabilité-nest-pas-gratuite","2. La portabilité n'est pas gratuite",[76,158,159,160,163],{},"La leçon honnête du jour. Le Kokkos « CPU » du Jour 4 portait un avertissement explicite dans le\ncode : ",[96,161,162],{},"il ne tourne que sur CPU, il n'est pas encore portable."," Atteindre le GPU a exigé deux\najustements :",[165,166,167,190],"ul",{},[168,169,170,175,176,179,180,185,186,189],"li",{},[79,171,172],{},[113,173,174],{},"Kokkos::LayoutRight"," sur les ",[113,177,178],{},"View"," et ",[79,181,182],{},[113,183,184],{},"Iterate::Right"," dans la politique, pour que des\nthreads voisins lisent des adresses voisines — la ",[79,187,188],{},"coalescence mémoire"," du Jour 6. Un layout\nqui l'ignore effondre la bande passante GPU.",[168,191,192,193],{},"La ",[79,194,195],{},"gestion explicite hôte↔device.",[76,197,198,199,202,203,206],{},"Kokkos garantit qu'un seul code ",[96,200,201],{},"compile et tourne"," partout, pas qu'il soit ",[96,204,205],{},"rapide"," partout.",[134,208,210],{"id":209},"_3-faire-voyager-les-données","3. Faire voyager les données",[212,213,218],"pre",{"className":214,"code":215,"language":216,"meta":217,"style":217},"language-cpp shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","auto u_h = Kokkos::create_mirror_view(u);  // tampon hôte associé à la View device\nKokkos::deep_copy(u_h, u);                  // device → hôte, seulement au besoin\nKokkos::fence(\"wait for compute\");          // les noyaux sont asynchrones\n","cpp","",[113,219,220,228,234],{"__ignoreMap":217},[221,222,225],"span",{"class":223,"line":224},"line",1,[221,226,227],{},"auto u_h = Kokkos::create_mirror_view(u);  // tampon hôte associé à la View device\n",[221,229,231],{"class":223,"line":230},2,[221,232,233],{},"Kokkos::deep_copy(u_h, u);                  // device → hôte, seulement au besoin\n",[221,235,237],{"class":223,"line":236},3,[221,238,239],{},"Kokkos::fence(\"wait for compute\");          // les noyaux sont asynchrones\n",[76,241,242],{},"Minimiser ces transferts, c'est le « couper les allers-retours » du Jour 2, transposé au bus PCIe.",[134,244,246,247,120,249,120,251],{"id":245},"_4-recouvrir-gpu-gpu_async-gpu_async_more","4. Recouvrir : ",[113,248,119],{},[113,250,123],{},[113,252,126],{},[254,255,256,269],"table",{},[257,258,259],"thead",{},[260,261,262,266],"tr",{},[263,264,265],"th",{},"Variante",[263,267,268],{},"Idée",[270,271,272,282,294],"tbody",{},[260,273,274,279],{},[275,276,277],"td",{},[113,278,119],{},[275,280,281],{},"de base : calcul sur device, copies synchrones",[260,283,284,288],{},[275,285,286],{},[113,287,123],{},[275,289,290,293],{},[79,291,292],{},"écriture asynchrone"," — l'I/O recouvre le calcul suivant",[260,295,296,300],{},[275,297,298],{},[113,299,126],{},[275,301,302,303,306],{},"synchronisation ",[79,304,305],{},"et"," écriture asynchrones — transferts masqués au maximum",[308,309],"d9-async",{},[129,311,313],{"id":312},"fin-daprès-midi-17-h-clôture","Fin d'après-midi (17 h) & clôture",[134,315,317],{"id":316},"la-racine-cubique-le-dernier-mot","La racine cubique — le dernier mot",[76,319,320,321,324],{},"L'exposé final optimise une ",[79,322,323],{},"racine cubique"," en C++ et en Fortran — rebouclant sur le Jour 3, et\nnouant la semaine : le compilateur, le langage, la mesure, et ne jamais se fier à l'intuition\navant d'avoir chronométré.",[134,326,328],{"id":327},"larc-de-loptimisation-la-clôture","L'arc de l'optimisation — la clôture",[76,330,331,332,335,336,339],{},"Des fondations CPU (Jour 1) au noyau GPU portable (aujourd'hui), l'école a tracé un seul arc : le\n",[79,333,334],{},"même"," Gray-Scott, rendu plus rapide étape par étape — vectorisation, multicœur, Fortran, ",[113,337,338],{},"do concurrent",", Python, et enfin Kokkos — à résultat numérique constant. La leçon n'est ni un langage\nni une API : c'est une méthode. Mesurer, trouver le facteur limitant, exploiter le matériel.",[129,341,343],{"id":342},"en-vidéo-le-replay-officiel","En vidéo — le replay officiel",[345,346],"yt-embed",{"caption":347,"id":348,"title":98},"Replay — Kokkos on GPU (Gray Scott Thursdays)","_b4zn7r7jhI",[129,350,352],{"id":351},"sources-matériel-officiel","Sources & matériel officiel",[165,354,355,374,385,401,411],{},[168,356,357,360,361,368,369],{},[79,358,359],{},"Les slides du jour"," (PDF, wiki GitLab de l'école) :\n",[362,363,367],"a",{"href":364,"rel":365},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-9/kokkos_gpu.pdf",[366],"nofollow","kokkos_gpu.pdf"," ·\n",[362,370,373],{"href":371,"rel":372},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-9/Reprises_IJC_PSA_vectorisation_2026.pdf",[366],"la racine cubique — Reprises IJC/PSA vectorisation",[168,375,376,379,380],{},[79,377,378],{},"Le dépôt du cours"," (exercices gpu/gpu_async/gpu_async_more + cours LaTeX) :\n",[362,381,384],{"href":382,"rel":383},"https://github.com/Maison-de-la-Simulation/gray-scott-kokkos",[366],"github.com/Maison-de-la-Simulation/gray-scott-kokkos",[168,386,387,390,391,368,396],{},[79,388,389],{},"Kokkos"," :\n",[362,392,395],{"href":393,"rel":394},"https://kokkos.org/",[366],"kokkos.org",[362,397,400],{"href":398,"rel":399},"https://github.com/kokkos/kokkos",[366],"github.com/kokkos/kokkos",[168,402,403,390,406],{},[79,404,405],{},"Replays vidéo (YouTube)",[362,407,410],{"href":408,"rel":409},"https://www.youtube.com/playlist?list=PLiZttWgOMudb6PsUoWtxY3G4Gv8f2lurG",[366],"Gray Scott Thursdays",[168,412,413,390,416],{},[79,414,415],{},"Site de l'école",[362,417,420],{"href":418,"rel":419},"https://cta-lapp.pages.in2p3.fr/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/index.html",[366],"GrayScott2026",[422,423,424],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":217,"searchDepth":230,"depth":230,"links":426},[427,434,438,439],{"id":131,"depth":230,"text":132,"children":428},[429,430,431,432],{"id":136,"depth":236,"text":137},{"id":155,"depth":236,"text":156},{"id":209,"depth":236,"text":210},{"id":245,"depth":236,"text":433},"4. Recouvrir : gpu → gpu_async → gpu_async_more",{"id":312,"depth":230,"text":313,"children":435},[436,437],{"id":316,"depth":236,"text":317},{"id":327,"depth":236,"text":328},{"id":342,"depth":230,"text":343},{"id":351,"depth":230,"text":352},"2 juillet, dernier jour : Kokkos sur GPU avec Paul Zehner, Juan-José Silva Cuevas et Thomas Padioleau — layout, transferts, recouvrement async — puis l'histoire de la racine cubique et la clôture.","md",[443],{"label":444,"icon":445,"to":382,"target":446},"Dépôt du cours","i-simple-icons-github","_blank",{"icon":448},"lucide:briefcase",true,{"title":47,"description":440},"yDL0rDaOX72vpGl--6vot7Sr3t2kUyPFxZW3UD16xH0",[453,455],{"title":43,"path":44,"stem":45,"description":454,"children":-1},"1er juillet, avec Vincent Lafage : le Fortran standard sur GPU via do concurrent, comparé à OpenACC et OpenMP target — mesuré en local sur GTX 1650 — puis la session polyglotte Julia · Rust · C++ · pixi de Pierre Aubert.",{"title":51,"path":52,"stem":53,"description":456,"children":-1},"Les projets réalisés par les apprenants de la Gray Scott School, appliquant à des problèmes réels les techniques HPC apprises à la CINERI.",1783172493046]