[{"data":1,"prerenderedAt":637},["ShallowReactive",2],{"navigation_docs_en":3,"-en-gray-scott-school-jour-6":66,"-en-gray-scott-school-jour-6-surround":632},[4,50,60],{"title":5,"path":6,"stem":7,"children":8},"The Gray Scott School","/en/gray-scott-school","en/1.gray-scott-school/01.index",[9,10,14,18,22,26,30,34,38,42,46],{"title":5,"path":6,"stem":7},{"title":11,"path":12,"stem":13},"CINERI Presentation","/en/gray-scott-school/presentation-cineri","en/1.gray-scott-school/02.presentation-cineri",{"title":15,"path":16,"stem":17},"Day 1 — Foundations","/en/gray-scott-school/jour-1","en/1.gray-scott-school/03.jour-1",{"title":19,"path":20,"stem":21},"Day 2 — C++ on CPU","/en/gray-scott-school/jour-2","en/1.gray-scott-school/04.jour-2",{"title":23,"path":24,"stem":25},"Day 3 — Fortran on CPU","/en/gray-scott-school/jour-3","en/1.gray-scott-school/05.jour-3",{"title":27,"path":28,"stem":29},"Day 4 — Kokkos on CPU","/en/gray-scott-school/jour-4","en/1.gray-scott-school/06.jour-4",{"title":31,"path":32,"stem":33},"Day 5 — Python on CPU","/en/gray-scott-school/jour-5","en/1.gray-scott-school/07.jour-5",{"title":35,"path":36,"stem":37},"Day 6 — SIMD with EVE + GPU architecture","/en/gray-scott-school/jour-6","en/1.gray-scott-school/08.jour-6",{"title":39,"path":40,"stem":41},"Day 7 — Python on GPU","/en/gray-scott-school/jour-7","en/1.gray-scott-school/09.jour-7",{"title":43,"path":44,"stem":45},"Day 8 — Fortran on GPU","/en/gray-scott-school/jour-8","en/1.gray-scott-school/10.jour-8",{"title":47,"path":48,"stem":49},"Day 9 — Kokkos on GPU","/en/gray-scott-school/jour-9","en/1.gray-scott-school/11.jour-9",{"title":51,"path":52,"stem":53,"children":54},"Projects","/en/projets","en/2.projets/1.index",[55,56],{"title":51,"path":52,"stem":53},{"title":57,"path":58,"stem":59},"SenLand","/en/projets/senland","en/2.projets/2.senland",{"title":61,"path":62,"stem":63,"children":64},"About","/en/a-propos","en/3.a-propos/1.index",[65],{"title":61,"path":62,"stem":63},{"id":67,"title":35,"badge":68,"body":69,"category":68,"description":621,"extension":622,"links":623,"meta":628,"navigation":167,"path":36,"seo":630,"stem":37,"tags":68,"__hash__":631},"docs_en/en/1.gray-scott-school/08.jour-6.md",null,{"type":70,"value":71,"toc":602},"minimark",[72,105,110,115,130,138,145,181,187,190,194,275,279,289,314,329,333,348,352,356,374,377,381,400,404,423,426,432,439,495,509,513,519,524,529,533,598],[73,74,75],"blockquote",{},[76,77,78,82,83,86,87,91,92,95,96,99,100,104],"p",{},[79,80,81],"strong",{},"June 29, 2026"," — week 2 opens · Morning: ",[79,84,85],{},"Joël Falcou"," (LISN, CodeReckons) —\n",[88,89,90],"em",{},"C++ 20 Computing with EVE + Kiwaku"," · Afternoon (2 pm): ",[79,93,94],{},"Pierre Aubert"," (LAPP) —\n",[88,97,98],{},"GPU Architecture, massively parallel computing"," · Marcel Vivargent Auditorium + satellites\n(including CINERI). The hands-on lives in ",[101,102,103],"code",{},"GrayScott2026/day-6/"," — Falcou's exercise files +\nthe cloned EVE library.",[106,107,109],"h2",{"id":108},"morning-session-eve-kiwaku-simd-owned","Morning session — EVE + Kiwaku, SIMD owned",[111,112,114],"h3",{"id":113},"_1-the-problem-a-fragile-parallelism","1. The problem: a fragile parallelism",[76,116,117,118,121,122,125,126,129],{},"A single core already processes several floats per instruction (8 with AVX2, 16 with\nAVX-512) — orthogonal to multithreading. But this parallelism is ",[79,119,120],{},"fragile",": compiler\nauto-vectorization is not guaranteed (Day 3 showed ",[88,123,124],{},"refused"," loops), and hand-written\nintrinsics do not survive a change of instruction set. Falcou's answer: make it a ",[79,127,128],{},"type",".",[111,131,133,134,137],{"id":132},"_2-evewide-the-register-becomes-a-c-type","2. ",[101,135,136],{},"eve::wide"," — the register becomes a C++ type",[76,139,140,141,144],{},"First hands-on exercise, ",[101,142,143],{},"basic.cpp"," — seven lines that hold the whole thesis:",[146,147,152],"pre",{"className":148,"code":149,"language":150,"meta":151,"style":151},"language-cpp shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","eve::wide\u003Cfloat, eve::fixed\u003C8>> x( [](auto i, auto) { return 1.f + i; } );\n\nstd::cout \u003C\u003C \"EVE is optimizing for: \" \u003C\u003C eve::current_api \u003C\u003C \"\\n\";\nstd::cout \u003C\u003C eve::sqrt(eve::abs(1 - x)) \u003C\u003C \"\\n\";   // all 8 lanes at once\n","cpp","",[101,153,154,162,169,175],{"__ignoreMap":151},[155,156,159],"span",{"class":157,"line":158},"line",1,[155,160,161],{},"eve::wide\u003Cfloat, eve::fixed\u003C8>> x( [](auto i, auto) { return 1.f + i; } );\n",[155,163,165],{"class":157,"line":164},2,[155,166,168],{"emptyLinePlaceholder":167},true,"\n",[155,170,172],{"class":157,"line":171},3,[155,173,174],{},"std::cout \u003C\u003C \"EVE is optimizing for: \" \u003C\u003C eve::current_api \u003C\u003C \"\\n\";\n",[155,176,178],{"class":157,"line":177},4,[155,179,180],{},"std::cout \u003C\u003C eve::sqrt(eve::abs(1 - x)) \u003C\u003C \"\\n\";   // all 8 lanes at once\n",[76,182,183,186],{},[101,184,185],{},"eve::current_api"," prints the ISA detected at compile time — the same source emits AVX2\nhere, AVX-512 or NEON elsewhere.",[188,189],"d6-wide",{},[111,191,193],{"id":192},"_3-the-hands-on-progression","3. The hands-on progression",[195,196,197,210],"table",{},[198,199,200],"thead",{},[201,202,203,207],"tr",{},[204,205,206],"th",{},"File",[204,208,209],{},"What it teaches",[211,212,213,229,239,253,263],"tbody",{},[201,214,215,220],{},[216,217,218],"td",{},[101,219,143],{},[216,221,222,225,226],{},[101,223,224],{},"wide",", vector math functions, ",[101,227,228],{},"current_api",[201,230,231,236],{},[216,232,233],{},[101,234,235],{},"math.cpp",[216,237,238],{},"EVE's function families over a real array",[201,240,241,246],{},[216,242,243],{},[101,244,245],{},"hypot.cpp",[216,247,248,249,252],{},"accuracy ",[88,250,251],{},"and"," performance: naive vs robust hypotenuse, vectorized",[201,254,255,260],{},[216,256,257],{},[101,258,259],{},"bilateral.cpp",[216,261,262],{},"a bilateral image filter — SIMD on a real algorithm",[201,264,265,270],{},[216,266,267],{},[101,268,269],{},"gray_scott.cpp",[216,271,272],{},[79,273,274],{},"the course stencil, hand-vectorized",[111,276,278],{"id":277},"_4-the-eve-gray-scott-guaranteed-vectorization","4. The EVE Gray-Scott — guaranteed vectorization",[76,280,281,282,284,285,288],{},"The final kernel loads the nine neighbors as ",[101,283,224],{}," and chains explicit ",[79,286,287],{},"FMAs",":",[146,290,292],{"className":148,"code":291,"language":150,"meta":151,"style":151},"auto u = eve::load(&su[i]);\nauto full_u1 = w00 * (eve::load(&su[i - W - 1]) - u);   // 8 cells at a time\nfull_u1 = eve::fma(w12, (eve::load(&su[i + 1]) - u), full_u1);\n// … the stencil's 9 terms, fused into FMA chains\n",[101,293,294,299,304,309],{"__ignoreMap":151},[155,295,296],{"class":157,"line":158},[155,297,298],{},"auto u = eve::load(&su[i]);\n",[155,300,301],{"class":157,"line":164},[155,302,303],{},"auto full_u1 = w00 * (eve::load(&su[i - W - 1]) - u);   // 8 cells at a time\n",[155,305,306],{"class":157,"line":171},[155,307,308],{},"full_u1 = eve::fma(w12, (eve::load(&su[i + 1]) - u), full_u1);\n",[155,310,311],{"class":157,"line":177},[155,312,313],{},"// … the stencil's 9 terms, fused into FMA chains\n",[76,315,316,317,320,321,324,325,328],{},"What Day 3 obtained by ",[88,318,319],{},"negotiating"," with ",[101,322,323],{},"-fopt-info-vec",", EVE obtains ",[79,326,327],{},"by construction"," —\nvectorization is no longer a hope, the type enforces it.",[111,330,332],{"id":331},"_5-kiwaku-the-matching-containers","5. Kiwaku — the matching containers",[76,334,335,336,339,340,343,344,347],{},"Same author, one level up: ",[79,337,338],{},"Kiwaku"," provides the multidimensional containers and views\n(shape, strides, hardware adaptation) designed to plug EVE in — the \"containers\" duo\nannounced by the session. Still young, but the direction is clear: ",[88,341,342],{},"one"," algorithm code,\n",[88,345,346],{},"many"," targets.",[106,349,351],{"id":350},"afternoon-session-gpu-architecture-2-pm","Afternoon session — GPU architecture (2 pm)",[111,353,355],{"id":354},"_6-switching-philosophy","6. Switching philosophy",[76,357,358,359,362,363,366,367,370,371,129],{},"At 2 pm Pierre Aubert flips the perspective: the CPU ",[79,360,361],{},"hides latency"," (big caches, few\npowerful cores); the GPU ",[79,364,365],{},"drowns it in numbers"," — dozens of ",[88,368,369],{},"Streaming Multiprocessors",",\nhundreds of simple cores each, threads advancing in ",[79,372,373],{},"warps of 32",[375,376],"d6-gpu",{},[111,378,380],{"id":379},"_7-the-gpu-memory-hierarchy","7. The GPU memory hierarchy",[76,382,383,384,387,388,391,392,395,396,399],{},"The session details the three levels that command the rest of the week: ",[79,385,386],{},"registers"," (per\nthread), ",[79,389,390],{},"shared memory"," (per SM — the \"hand-managed cache\"), and ",[79,393,394],{},"global memory"," (HBM)\nwhose accesses must be ",[79,397,398],{},"coalesced",": the 32 threads of a warp read neighboring addresses, or\nthroughput collapses. It is the layout lesson of Days 2-4, transposed to 32 lanes.",[111,401,403],{"id":402},"_8-nvc-as-a-scout-then-days-7-9","8. nvc++ as a scout, then Days 7-9",[76,405,406,407,410,411,414,415,418,419,422],{},"The day closes (3 pm, Pierre Aubert again) with a scout: ",[79,408,409],{},"nvc++ compiles standard C++17\ndirectly for the GPU"," since 2020 — ",[101,412,413],{},"std::transform"," + parallel execution policies, no\nexternal library. It is the ",[101,416,417],{},"stdpar"," spirit we will meet again in Fortran on Day 8\n(",[101,420,421],{},"do concurrent",").",[76,424,425],{},"Everything is in place: Python on GPU tomorrow (Day 7), Fortran on GPU (Day 8), Kokkos on GPU\n(Day 9) — three languages, one single target architecture: this afternoon's.",[106,427,429,430],{"id":428},"the-hands-on-grayscott2026day-6","The hands-on — ",[101,431,103],{},[76,433,434,435,438],{},"EVE is ",[79,436,437],{},"header-only",": clone and compile.",[146,440,444],{"className":441,"code":442,"language":443,"meta":151,"style":151},"language-bash shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","cd GrayScott2026/day-6\ng++ -std=c++20 -O3 -march=native -I eve/include basic.cpp -o basic\n./basic          # → \"EVE is optimizing for: X86 AVX2\" (per your machine)\n","bash",[101,445,446,456,486],{"__ignoreMap":151},[155,447,448,452],{"class":157,"line":158},[155,449,451],{"class":450},"s2Zo4","cd",[155,453,455],{"class":454},"sfazB"," GrayScott2026/day-6\n",[155,457,458,462,465,468,471,474,477,480,483],{"class":157,"line":164},[155,459,461],{"class":460},"sBMFI","g++",[155,463,464],{"class":454}," -std=c++20",[155,466,467],{"class":454}," -O3",[155,469,470],{"class":454}," -march=native",[155,472,473],{"class":454}," -I",[155,475,476],{"class":454}," eve/include",[155,478,479],{"class":454}," basic.cpp",[155,481,482],{"class":454}," -o",[155,484,485],{"class":454}," basic\n",[155,487,488,491],{"class":157,"line":171},[155,489,490],{"class":460},"./basic",[155,492,494],{"class":493},"sHwdD","          # → \"EVE is optimizing for: X86 AVX2\" (per your machine)\n",[76,496,497,498,500,501,500,503,505,506,508],{},"Then unroll ",[101,499,235],{},", ",[101,502,245],{},[101,504,259],{}," and ",[101,507,269],{}," in the same mold.\nOn the local hands-on machine: AVX2 detected, 8 floats per instruction.",[106,510,512],{"id":511},"on-video-the-official-replays","On video — the official replays",[514,515],"yt-embed",{"caption":516,"id":517,"title":518},"Replay — EVE, a C++20 computing library on CPU (Gray Scott Thursdays)","eu3k3U52KUA","EVE a C++ 20 computing library on CPU",[514,520],{"caption":521,"id":522,"title":523},"Replay — GPU Architecture (Gray Scott Thursdays)","jsZrGSrs5ZM","GPU Architecture",[514,525],{"caption":526,"id":527,"title":528},"Replay — Modern C++ GPU computing with std::algorithm and CUDA (Gray Scott Thursdays)","G8u8nb4WGss","Modern C++ GPU Computing with std::algorithm and Cuda",[106,530,532],{"id":531},"sources-official-material","Sources & official material",[534,535,536,550,561,578,588],"ul",{},[537,538,539,542,543],"li",{},[79,540,541],{},"The EVE slides"," (Joël Falcou's online lecture):\n",[544,545,549],"a",{"href":546,"rel":547},"https://events.codereckons.com/eve/",[548],"nofollow","events.codereckons.com/eve",[537,551,552,555,556],{},[79,553,554],{},"The day's PDF"," (school GitLab wiki):\n",[544,557,560],{"href":558,"rel":559},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-6/gray_scott.pdf",[548],"gray_scott.pdf",[537,562,563,566,567,572,573],{},[79,564,565],{},"The libraries",":\n",[544,568,571],{"href":569,"rel":570},"https://github.com/jfalcou/eve",[548],"github.com/jfalcou/eve"," ·\n",[544,574,577],{"href":575,"rel":576},"https://github.com/jfalcou/kiwaku",[548],"github.com/jfalcou/kiwaku",[537,579,580,566,583],{},[79,581,582],{},"Video replays (YouTube)",[544,584,587],{"href":585,"rel":586},"https://www.youtube.com/playlist?list=PLiZttWgOMudb6PsUoWtxY3G4Gv8f2lurG",[548],"Gray Scott Thursdays",[537,589,590,566,593],{},[79,591,592],{},"School website",[544,594,597],{"href":595,"rel":596},"https://cta-lapp.pages.in2p3.fr/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/index.html",[548],"GrayScott2026",[599,600,601],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .s2Zo4, html code.shiki .s2Zo4{--shiki-light:#6182B8;--shiki-default:#82AAFF;--shiki-dark:#82AAFF}html pre.shiki code .sfazB, html code.shiki .sfazB{--shiki-light:#91B859;--shiki-default:#C3E88D;--shiki-dark:#C3E88D}html pre.shiki code .sBMFI, html code.shiki .sBMFI{--shiki-light:#E2931D;--shiki-default:#FFCB6B;--shiki-dark:#FFCB6B}html pre.shiki code .sHwdD, html code.shiki .sHwdD{--shiki-light:#90A4AE;--shiki-light-font-style:italic;--shiki-default:#546E7A;--shiki-default-font-style:italic;--shiki-dark:#676E95;--shiki-dark-font-style:italic}",{"title":151,"searchDepth":164,"depth":164,"links":603},[604,612,617,619,620],{"id":108,"depth":164,"text":109,"children":605},[606,607,609,610,611],{"id":113,"depth":171,"text":114},{"id":132,"depth":171,"text":608},"2. eve::wide — the register becomes a C++ type",{"id":192,"depth":171,"text":193},{"id":277,"depth":171,"text":278},{"id":331,"depth":171,"text":332},{"id":350,"depth":164,"text":351,"children":613},[614,615,616],{"id":354,"depth":171,"text":355},{"id":379,"depth":171,"text":380},{"id":402,"depth":171,"text":403},{"id":428,"depth":164,"text":618},"The hands-on — GrayScott2026/day-6/",{"id":511,"depth":164,"text":512},{"id":531,"depth":164,"text":532},"June 29, two sessions: Joël Falcou opens the week with EVE and Kiwaku (explicit, portable C++20 SIMD), Pierre Aubert follows with the GPU architecture that carries the last three days.","md",[624],{"label":625,"icon":626,"to":569,"target":627},"EVE (GitHub)","i-simple-icons-github","_blank",{"icon":629},"lucide:cpu",{"title":35,"description":621},"uBDWk17ZE4TyRsUr6VbSOASIJ_yaeQbCmhK29vYBBU4",[633,635],{"title":31,"path":32,"stem":33,"description":634,"children":-1},"June 26, with Alice Faure, Jean-Marc Colley, Sébastien Valat and Nabil Garroum: profile Python, vectorize with NumPy, compile with Numba, then trace with JAX — up to ×18 without leaving Python.",{"title":39,"path":40,"stem":41,"description":636,"children":-1},"June 30, four sessions with Alice Faure, Jean-Marc Colley, Sébastien Valat and Nabil Garroum: CuPy, cuPyNumeric and JAX port Day 5's Gray-Scott to the accelerator — official A100 numbers included.",1783172490754]