[{"data":1,"prerenderedAt":574},["ShallowReactive",2],{"navigation_docs_en":3,"-en-gray-scott-school-jour-8":66,"-en-gray-scott-school-jour-8-surround":569},[4,50,60],{"title":5,"path":6,"stem":7,"children":8},"The Gray Scott School","/en/gray-scott-school","en/1.gray-scott-school/01.index",[9,10,14,18,22,26,30,34,38,42,46],{"title":5,"path":6,"stem":7},{"title":11,"path":12,"stem":13},"CINERI Presentation","/en/gray-scott-school/presentation-cineri","en/1.gray-scott-school/02.presentation-cineri",{"title":15,"path":16,"stem":17},"Day 1 — Foundations","/en/gray-scott-school/jour-1","en/1.gray-scott-school/03.jour-1",{"title":19,"path":20,"stem":21},"Day 2 — C++ on CPU","/en/gray-scott-school/jour-2","en/1.gray-scott-school/04.jour-2",{"title":23,"path":24,"stem":25},"Day 3 — Fortran on CPU","/en/gray-scott-school/jour-3","en/1.gray-scott-school/05.jour-3",{"title":27,"path":28,"stem":29},"Day 4 — Kokkos on CPU","/en/gray-scott-school/jour-4","en/1.gray-scott-school/06.jour-4",{"title":31,"path":32,"stem":33},"Day 5 — Python on CPU","/en/gray-scott-school/jour-5","en/1.gray-scott-school/07.jour-5",{"title":35,"path":36,"stem":37},"Day 6 — SIMD with EVE + GPU architecture","/en/gray-scott-school/jour-6","en/1.gray-scott-school/08.jour-6",{"title":39,"path":40,"stem":41},"Day 7 — Python on GPU","/en/gray-scott-school/jour-7","en/1.gray-scott-school/09.jour-7",{"title":43,"path":44,"stem":45},"Day 8 — Fortran on GPU","/en/gray-scott-school/jour-8","en/1.gray-scott-school/10.jour-8",{"title":47,"path":48,"stem":49},"Day 9 — Kokkos on GPU","/en/gray-scott-school/jour-9","en/1.gray-scott-school/11.jour-9",{"title":51,"path":52,"stem":53,"children":54},"Projects","/en/projets","en/2.projets/1.index",[55,56],{"title":51,"path":52,"stem":53},{"title":57,"path":58,"stem":59},"SenLand","/en/projets/senland","en/2.projets/2.senland",{"title":61,"path":62,"stem":63,"children":64},"About","/en/a-propos","en/3.a-propos/1.index",[65],{"title":61,"path":62,"stem":63},{"id":67,"title":43,"badge":68,"body":69,"category":68,"description":562,"extension":563,"links":68,"meta":564,"navigation":566,"path":44,"seo":567,"stem":45,"tags":68,"__hash__":568},"docs_en/en/1.gray-scott-school/10.jour-8.md",null,{"type":70,"value":71,"toc":550},"minimark",[72,113,118,127,139,180,191,195,296,306,309,313,328,332,339,426,436,444,448,463,467,473,478,482,546],[73,74,75],"blockquote",{},[76,77,78,82,83,86,87,91,92,95,96,99,100,104,105,108,109,112],"p",{},[79,80,81],"strong",{},"July 1, 2026"," · Morning: ",[79,84,85],{},"Vincent Lafage"," (IJCLab) — ",[88,89,90],"em",{},"Fortran 2018 on GPU"," ·\n5 pm: ",[79,93,94],{},"Pierre Aubert"," (LAPP) — ",[88,97,98],{},"Julia … in Rust … and C++ … with pixi ?"," · Marcel\nVivargent Auditorium + satellites (including CINERI). The hands-on reuses Day 3's repo\n(",[101,102,103],"code",{},"day-3-a/","), the ",[101,106,107],{},"GPU/"," side this time — every measurement below is ",[79,110,111],{},"reproduced\nlocally"," (nvfortran + GTX 1650).",[114,115,117],"h2",{"id":116},"morning-session-standard-fortran-on-the-gpu","Morning session — standard Fortran on the GPU",[119,120,122,123,126],"h3",{"id":121},"_1-do-concurrent-from-cpu-to-gpu-without-changing-a-line","1. ",[101,124,125],{},"do concurrent",": from CPU to GPU without changing a line",[76,128,129,130,133,134,138],{},"The Day 3 bridge is crossed. The nested ",[101,131,132],{},"do"," loops of the Laplacian become a ",[79,135,136],{},[101,137,125],{}," —\nan ISO standard construct that asserts the iterations are independent, so the compiler may run\nthem in any order.",[140,141,146],"pre",{"className":142,"code":143,"language":144,"meta":145,"style":145},"language-fortran shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","do concurrent (i = 2:nx-1, j = 2:ny-1)\n   lap_u = sum(stencil * U0(i-1:i+1, j-1:j+1))\n   U1(i,j) = U0(i,j) + dt * (Diffusivity_u*lap_u - U0(i,j)*V0(i,j)**2 &\n                             + Feed_Rate*(1.0_pr - U0(i,j)))\nend do\n","fortran","",[101,147,148,156,162,168,174],{"__ignoreMap":145},[149,150,153],"span",{"class":151,"line":152},"line",1,[149,154,155],{},"do concurrent (i = 2:nx-1, j = 2:ny-1)\n",[149,157,159],{"class":151,"line":158},2,[149,160,161],{},"   lap_u = sum(stencil * U0(i-1:i+1, j-1:j+1))\n",[149,163,165],{"class":151,"line":164},3,[149,166,167],{},"   U1(i,j) = U0(i,j) + dt * (Diffusivity_u*lap_u - U0(i,j)*V0(i,j)**2 &\n",[149,169,171],{"class":151,"line":170},4,[149,172,173],{},"                             + Feed_Rate*(1.0_pr - U0(i,j)))\n",[149,175,177],{"class":151,"line":176},5,[149,178,179],{},"end do\n",[76,181,182,183,186,187,190],{},"No directive, no API — plain Fortran. With ",[101,184,185],{},"-stdpar=gpu",", ",[79,188,189],{},"nvfortran"," generates a kernel,\nallocates the arrays in unified memory and moves the data itself.",[119,192,194],{"id":193},"_2-three-offload-back-ends-one-source","2. Three offload back-ends, one source",[196,197,198,217],"table",{},[199,200,201],"thead",{},[202,203,204,208,211,214],"tr",{},[205,206,207],"th",{},"Variant",[205,209,210],{},"Mechanism",[205,212,213],{},"Key flag",[205,215,216],{},"Target",[218,219,220,239,257,277],"tbody",{},[202,221,222,228,232,236],{},[223,224,225],"td",{},[101,226,227],{},"stdpar",[223,229,230],{},[101,231,125],{},[223,233,234],{},[101,235,185],{},[223,237,238],{},"GPU",[202,240,241,245,249,254],{},[223,242,243],{},[101,244,227],{},[223,246,247],{},[101,248,125],{},[223,250,251],{},[101,252,253],{},"-stdpar=multicore",[223,255,256],{},"CPU cores",[202,258,259,264,270,275],{},[223,260,261],{},[101,262,263],{},"openacc",[223,265,266,269],{},[101,267,268],{},"!$acc"," directives",[223,271,272],{},[101,273,274],{},"-acc",[223,276,238],{},[202,278,279,284,289,294],{},[223,280,281],{},[101,282,283],{},"openmp_offload",[223,285,286],{},[101,287,288],{},"!$omp target",[223,290,291],{},[101,292,293],{},"-mp=gpu",[223,295,238],{},[76,297,298,301,302,305],{},[101,299,300],{},"-gpu=ccnative"," targets the compute capability of the GPU present; ",[101,303,304],{},"-Minfo=accel"," makes the\ncompiler report which loops it offloaded — the first diagnostic reflex.",[307,308],"d8-offload",{},[119,310,312],{"id":311},"_3-the-hdf5-pitfall","3. The HDF5 pitfall",[76,314,315,316,319,320,323,324,327],{},"Fortran module files (",[101,317,318],{},".mod",") are ",[79,321,322],{},"not portable across compilers",": an HDF5 built with gfortran\nis unreadable by nvfortran. Either rebuild HDF5 with nvfortran, or disable output\n(",[101,325,326],{},"do_write = .false.",") to time the pure GPU kernel.",[119,329,331],{"id":330},"_4-measure-then-compare","4. Measure, then compare",[76,333,334,335,338],{},"Measured on a ",[79,336,337],{},"GeForce GTX 1650"," (1024×1024 grid, 4000 steps, HDF5 off):",[196,340,341,356],{},[199,342,343],{},[202,344,345,347,350,353],{},[205,346,207],{},[205,348,349],{},"Flag",[205,351,352],{},"Time",[205,354,355],{},"Speedup",[218,357,358,379,394,409],{},[202,359,360,365,369,374],{},[223,361,362,364],{},[101,363,125],{}," → GPU",[223,366,367],{},[101,368,185],{},[223,370,371],{},[79,372,373],{},"1.12 s",[223,375,376],{},[79,377,378],{},"48×",[202,380,381,384,388,391],{},[223,382,383],{},"OpenACC → GPU",[223,385,386],{},[101,387,274],{},[223,389,390],{},"1.46 s",[223,392,393],{},"37×",[202,395,396,399,403,406],{},[223,397,398],{},"OpenMP target → GPU",[223,400,401],{},[101,402,293],{},[223,404,405],{},"1.55 s",[223,407,408],{},"35×",[202,410,411,416,420,423],{},[223,412,413,415],{},[101,414,125],{}," → CPU (≈ 7 cores)",[223,417,418],{},[101,419,253],{},[223,421,422],{},"53.85 s",[223,424,425],{},"baseline",[76,427,428,429,431,432,435],{},"The GPU crushes the multicore CPU (~48×), and the three offload back-ends sit within two tenths\nof a second — standard ",[101,430,125],{}," ",[79,433,434],{},"matches the directives"," while staying plain Fortran.",[437,438],"gs-bar-chart",{":categories":439,":series":440,"note":441,"title":442,"unit":443},"[\"do concurrent → GPU\",\"OpenACC → GPU\",\"OpenMP target → GPU\",\"do concurrent → CPU (~7c)\"]","[{\"name\":\"Time\",\"values\":[1.12,1.46,1.55,53.85]}]","The CPU bar dwarfs the three GPU bars — offload cuts the time by ~48×.","Gray-Scott: GPU vs CPU (GTX 1650, 1024×1024, 4000 steps)"," s",[114,445,447],{"id":446},"closing-session-5-pm-julia-rust-c-united-by-pixi","Closing session (5 pm) — Julia · Rust · C++, united by pixi",[76,449,450,451,454,455,458,459,462],{},"The day ends with an open-ended session: ",[88,452,453],{},"can Julia, Rust and C++ coexist for the same\ncomputation, in a single reproducible toolchain?"," No single language owns HPC — Julia for\nhigh-level prototyping, Rust for safe systems performance, C++ the incumbent. ",[79,456,457],{},"pixi"," is the\nanswer: it pins compilers, CUDA and the Julia/Rust runtimes in one lockfile, so the three worlds\nrun side by side. The lesson outlives Gray-Scott: it is the ",[79,460,461],{},"method"," that transfers, not the\nsyntax.",[114,464,466],{"id":465},"on-video-the-official-replays","On video — the official replays",[468,469],"yt-embed",{"caption":470,"id":471,"title":472},"Replay — Fortran On GPU (Gray Scott Thursdays)","jOAn5cM-26U","Fortran On GPU",[468,474],{"caption":475,"id":476,"title":477},"Replay — Introduction to Rust (Gray Scott Thursdays), echoing the polyglot session","rjEqukhkj0c","Introduction to Rust",[114,479,481],{"id":480},"sources-official-material","Sources & official material",[483,484,485,505,516,526,536],"ul",{},[486,487,488,491,492,499,500],"li",{},[79,489,490],{},"The day's slides"," (PDF, school GitLab wiki):\n",[493,494,498],"a",{"href":495,"rel":496},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-8/FortranFuriousGPU_IJC_dual_GS_2026.pdf",[497],"nofollow","FortranFuriousGPU — IJC dual GS 2026"," ·\n",[493,501,504],{"href":502,"rel":503},"https://gitlab.in2p3.fr/CTA-LAPP/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/-/wikis/uploads/GrayScottDay-8/8-webinar_julia_in_rust_and_cpp_with_pixi.pdf",[497],"Julia in Rust and C++ with pixi",[486,506,507,510,511],{},[79,508,509],{},"The course repository",":\n",[493,512,515],{"href":513,"rel":514},"https://gitlab.in2p3.fr/lafage/GrayScottFortranTuto",[497],"gitlab.in2p3.fr/lafage/GrayScottFortranTuto",[486,517,518,510,521],{},[79,519,520],{},"The compiler",[493,522,525],{"href":523,"rel":524},"https://developer.nvidia.com/hpc-sdk",[497],"NVIDIA HPC SDK (nvfortran)",[486,527,528,510,531],{},[79,529,530],{},"Video replays (YouTube)",[493,532,535],{"href":533,"rel":534},"https://www.youtube.com/playlist?list=PLiZttWgOMudb6PsUoWtxY3G4Gv8f2lurG",[497],"Gray Scott Thursdays",[486,537,538,510,541],{},[79,539,540],{},"School website",[493,542,545],{"href":543,"rel":544},"https://cta-lapp.pages.in2p3.fr/COURS/GRAY_SCOTT_REVOLUTIONS/GrayScott2026/index.html",[497],"GrayScott2026",[547,548,549],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":145,"searchDepth":158,"depth":158,"links":551},[552,559,560,561],{"id":116,"depth":158,"text":117,"children":553},[554,556,557,558],{"id":121,"depth":164,"text":555},"1. do concurrent: from CPU to GPU without changing a line",{"id":193,"depth":164,"text":194},{"id":311,"depth":164,"text":312},{"id":330,"depth":164,"text":331},{"id":446,"depth":158,"text":447},{"id":465,"depth":158,"text":466},{"id":480,"depth":158,"text":481},"Standard Fortran on the GPU via do concurrent, compared with OpenACC and OpenMP target from a single source — plus the closing polyglot session.","md",{"icon":565},"lucide:flame",true,{"title":43,"description":562},"QDz4V4S06dg1w7oixrMTU0IWUjwIlbM4ZNppXt7iRtU",[570,572],{"title":39,"path":40,"stem":41,"description":571,"children":-1},"June 30, four sessions with Alice Faure, Jean-Marc Colley, Sébastien Valat and Nabil Garroum: CuPy, cuPyNumeric and JAX port Day 5's Gray-Scott to the accelerator — official A100 numbers included.",{"title":47,"path":48,"stem":49,"description":573,"children":-1},"The portable kernel reaches the accelerator. Portability is not free — layout, host↔device transfers and synchronization — then the school closes.",1783172490754]