[{"data":1,"prerenderedAt":451},["ShallowReactive",2],{"navigation_docs_en":3,"-en-gray-scott-school-jour-9":66,"-en-gray-scott-school-jour-9-surround":446},[4,50,60],{"title":5,"path":6,"stem":7,"children":8},"The Gray Scott School","/en/gray-scott-school","en/1.gray-scott-school/01.index",[9,10,14,18,22,26,30,34,38,42,46],{"title":5,"path":6,"stem":7},{"title":11,"path":12,"stem":13},"CINERI Presentation","/en/gray-scott-school/presentation-cineri","en/1.gray-scott-school/02.presentation-cineri",{"title":15,"path":16,"stem":17},"Day 1 — Foundations","/en/gray-scott-school/jour-1","en/1.gray-scott-school/03.jour-1",{"title":19,"path":20,"stem":21},"Day 2 — C++ on CPU","/en/gray-scott-school/jour-2","en/1.gray-scott-school/04.jour-2",{"title":23,"path":24,"stem":25},"Day 3 — Fortran on CPU","/en/gray-scott-school/jour-3","en/1.gray-scott-school/05.jour-3",{"title":27,"path":28,"stem":29},"Day 4 — Kokkos on CPU","/en/gray-scott-school/jour-4","en/1.gray-scott-school/06.jour-4",{"title":31,"path":32,"stem":33},"Day 5 — Python on CPU","/en/gray-scott-school/jour-5","en/1.gray-scott-school/07.jour-5",{"title":35,"path":36,"stem":37},"Day 6 — SIMD with EVE + GPU architecture","/en/gray-scott-school/jour-6","en/1.gray-scott-school/08.jour-6",{"title":39,"path":40,"stem":41},"Day 7 — Python on GPU","/en/gray-scott-school/jour-7","en/1.gray-scott-school/09.jour-7",{"title":43,"path":44,"stem":45},"Day 8 — Fortran on GPU","/en/gray-scott-school/jour-8","en/1.gray-scott-school/10.jour-8",{"title":47,"path":48,"stem":49},"Day 9 — Kokkos on GPU","/en/gray-scott-school/jour-9","en/1.gray-scott-school/11.jour-9",{"title":51,"path":52,"stem":53,"children":54},"Projects","/en/projets","en/2.projets/1.index",[55,56],{"title":51,"path":52,"stem":53},{"title":57,"path":58,"stem":59},"SenLand","/en/projets/senland","en/2.projets/2.senland",{"title":61,"path":62,"stem":63,"children":64},"About","/en/a-propos","en/3.a-propos/1.index",[65],{"title":61,"path":62,"stem":63},{"id":67,"title":47,"badge":68,"body":69,"category":68,"description":439,"extension":440,"links":68,"meta":441,"navigation":443,"path":48,"seo":444,"stem":49,"tags":68,"__hash__":445},"docs_en/en/1.gray-scott-school/11.jour-9.md",null,{"type":70,"value":71,"toc":424},"minimark",[72,128,133,138,153,157,164,195,206,210,239,242,252,306,309,313,317,324,328,339,343,348,352,420],[73,74,75],"blockquote",{},[76,77,78,82,83,86,87,90,91,94,95,99,100,103,104,107,108,111,112,116,117,120,121,120,124,127],"p",{},[79,80,81],"strong",{},"July 2, 2026 — last day"," · Morning: ",[79,84,85],{},"Paul Zehner",", ",[79,88,89],{},"Juan-José Silva Cuevas"," &\n",[79,92,93],{},"Thomas Padioleau"," (Maison de la Simulation, CEA) — ",[96,97,98],"em",{},"Kokkos on GPU"," · 5 pm:\n",[79,101,102],{},"Vincent Lafage"," — ",[96,105,106],{},"A story about cubic root optimisation in C++ and Fortran"," · then the\n",[79,109,110],{},"closing presentation",". The hands-on reuses Day 4's repo (",[113,114,115],"code",{},"day-4/exercises/","), the\n",[113,118,119],{},"gpu"," → ",[113,122,123],{},"gpu_async",[113,125,126],{},"gpu_async_more"," side.",[129,130,132],"h2",{"id":131},"morning-session-kokkos-reaches-the-accelerator","Morning session — Kokkos reaches the accelerator",[134,135,137],"h3",{"id":136},"_1-one-source-all-backends","1. One source, all backends",[76,139,140,141,144,145,148,149,152],{},"The Day 4 Kokkos kernel now lands on the GPU: the backend is chosen at compile time — OpenMP for\nCPU cores, ",[79,142,143],{},"CUDA"," (NVIDIA), ",[79,146,147],{},"HIP"," (AMD), ",[79,150,151],{},"SYCL"," (Intel). The same Gray-Scott runs on a\nworkstation, on a Jean-Zay node (Volta GPU), or tomorrow on an AMD accelerator.",[134,154,156],{"id":155},"_2-portability-is-not-free","2. Portability is not free",[76,158,159,160,163],{},"The honest lesson of the day. The Day 4 \"CPU\" Kokkos carried an explicit warning in the code:\n",[96,161,162],{},"it only runs on CPU, it is not yet portable."," Reaching the GPU required two adjustments:",[165,166,167,190],"ul",{},[168,169,170,175,176,179,180,185,186,189],"li",{},[79,171,172],{},[113,173,174],{},"Kokkos::LayoutRight"," on the ",[113,177,178],{},"View","s and ",[79,181,182],{},[113,183,184],{},"Iterate::Right"," in the policy, so neighboring\nthreads read neighboring addresses — Day 6's ",[79,187,188],{},"memory coalescing",". A layout that ignores it\ncollapses GPU bandwidth.",[168,191,192],{},[79,193,194],{},"Explicit host↔device management.",[76,196,197,198,201,202,205],{},"Kokkos guarantees one code ",[96,199,200],{},"compiles and runs"," everywhere, not that it is ",[96,203,204],{},"fast"," everywhere.",[134,207,209],{"id":208},"_3-moving-the-data","3. Moving the data",[211,212,217],"pre",{"className":213,"code":214,"language":215,"meta":216,"style":216},"language-cpp shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","auto u_h = Kokkos::create_mirror_view(u);  // host buffer paired with the device View\nKokkos::deep_copy(u_h, u);                  // device → host, only when needed\nKokkos::fence(\"wait for compute\");          // kernels are asynchronous\n","cpp","",[113,218,219,227,233],{"__ignoreMap":216},[220,221,224],"span",{"class":222,"line":223},"line",1,[220,225,226],{},"auto u_h = Kokkos::create_mirror_view(u);  // host buffer paired with the device View\n",[220,228,230],{"class":222,"line":229},2,[220,231,232],{},"Kokkos::deep_copy(u_h, u);                  // device → host, only when needed\n",[220,234,236],{"class":222,"line":235},3,[220,237,238],{},"Kokkos::fence(\"wait for compute\");          // kernels are asynchronous\n",[76,240,241],{},"Minimizing these transfers is Day 2's \"cut the round trips\", transposed to the PCIe bus.",[134,243,245,246,120,248,120,250],{"id":244},"_4-overlapping-gpu-gpu_async-gpu_async_more","4. Overlapping: ",[113,247,119],{},[113,249,123],{},[113,251,126],{},[253,254,255,268],"table",{},[256,257,258],"thead",{},[259,260,261,265],"tr",{},[262,263,264],"th",{},"Variant",[262,266,267],{},"Idea",[269,270,271,281,293],"tbody",{},[259,272,273,278],{},[274,275,276],"td",{},[113,277,119],{},[274,279,280],{},"baseline: compute on device, synchronous copies",[259,282,283,287],{},[274,284,285],{},[113,286,123],{},[274,288,289,292],{},[79,290,291],{},"asynchronous writing"," — I/O overlaps the next compute",[259,294,295,299],{},[274,296,297],{},[113,298,126],{},[274,300,301,302,305],{},"asynchronous sync ",[79,303,304],{},"and"," writing — transfers hidden as much as possible",[307,308],"d9-async",{},[129,310,312],{"id":311},"late-afternoon-5-pm-closing","Late afternoon (5 pm) & closing",[134,314,316],{"id":315},"the-cube-root-the-last-word","The cube root — the last word",[76,318,319,320,323],{},"The final talk optimises a ",[79,321,322],{},"cube root"," in C++ and Fortran — looping straight back to Day 3, and\ntying the week together: the compiler, the language, measurement, and never trusting intuition\nbefore timing.",[134,325,327],{"id":326},"_6-the-arc-of-optimization","6. The arc of optimization",[76,329,330,331,334,335,338],{},"From CPU foundations (Day 1) to a portable GPU kernel (today), the school traced one arc: the\n",[79,332,333],{},"same"," Gray-Scott, made faster step by step — vectorization, multicore, Fortran, ",[113,336,337],{},"do concurrent",", Python, and finally Kokkos — at a constant numerical result. The lesson is neither a\nlanguage nor an API: it is a method. Measure, find the limiting factor, exploit the hardware.",[129,340,342],{"id":341},"on-video-the-official-replay","On video — the official replay",[344,345],"yt-embed",{"caption":346,"id":347,"title":98},"Replay — Kokkos on GPU (Gray Scott Thursdays)","_b4zn7r7jhI",[129,349,351],{"id":350},"sources-official-material","Sources & official material",[165,353,354,373,384,400,410],{},[168,355,356,359,360,367,368],{},[79,357,358],{},"The day's slides"," (PDF, school GitLab wiki):\n",[361,362,366],"a",{"href":363,"rel":364},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-9/kokkos_gpu.pdf",[365],"nofollow","kokkos_gpu.pdf"," ·\n",[361,369,372],{"href":370,"rel":371},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-9/Reprises_IJC_PSA_vectorisation_2026.pdf",[365],"the cube root — Reprises IJC/PSA vectorisation",[168,374,375,378,379],{},[79,376,377],{},"The course repository"," (gpu/gpu_async/gpu_async_more exercises + LaTeX lecture):\n",[361,380,383],{"href":381,"rel":382},"https://github.com/Maison-de-la-Simulation/gray-scott-kokkos",[365],"github.com/Maison-de-la-Simulation/gray-scott-kokkos",[168,385,386,389,390,367,395],{},[79,387,388],{},"Kokkos",":\n",[361,391,394],{"href":392,"rel":393},"https://kokkos.org/",[365],"kokkos.org",[361,396,399],{"href":397,"rel":398},"https://github.com/kokkos/kokkos",[365],"github.com/kokkos/kokkos",[168,401,402,389,405],{},[79,403,404],{},"Video replays (YouTube)",[361,406,409],{"href":407,"rel":408},"https://www.youtube.com/playlist?list=PLiZttWgOMudb6PsUoWtxY3G4Gv8f2lurG",[365],"Gray Scott Thursdays",[168,411,412,389,415],{},[79,413,414],{},"School website",[361,416,419],{"href":417,"rel":418},"https://cta-lapp.pages.in2p3.fr/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/index.html",[365],"GrayScott2026",[421,422,423],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":216,"searchDepth":229,"depth":229,"links":425},[426,433,437,438],{"id":131,"depth":229,"text":132,"children":427},[428,429,430,431],{"id":136,"depth":235,"text":137},{"id":155,"depth":235,"text":156},{"id":208,"depth":235,"text":209},{"id":244,"depth":235,"text":432},"4. Overlapping: gpu → gpu_async → gpu_async_more",{"id":311,"depth":229,"text":312,"children":434},[435,436],{"id":315,"depth":235,"text":316},{"id":326,"depth":235,"text":327},{"id":341,"depth":229,"text":342},{"id":350,"depth":229,"text":351},"The portable kernel reaches the accelerator. Portability is not free — layout, host↔device transfers and synchronization — then the school closes.","md",{"icon":442},"lucide:briefcase",true,{"title":47,"description":439},"fEh1TKsVbv5s35W4UeEbwGVBWdqn9CquL8OE3pft40E",[447,449],{"title":43,"path":44,"stem":45,"description":448,"children":-1},"Standard Fortran on the GPU via do concurrent, compared with OpenACC and OpenMP target from a single source — plus the closing polyglot session.",{"title":51,"path":52,"stem":53,"description":450,"children":-1},"Projects built by Gray Scott School learners, applying the HPC techniques taught at CINERI to real problems.",1783172490754]