@@ -14,9 +14,7 @@ As <a href="https://github.com/ORNL/cpp-proposals-pub/issues/464">pointed out by
1414(CSCS Swiss National Supercomputing Centre), some of the <i >Mandates</i >, <i >Preconditions</i >, and
1515<i >Complexity</i > elements of some BLAS 2 and BLAS 3 algorithms in [linalg] are incorrect.
1616</p >
17- </discussion >
18-
19- <resolution >
17+ <superseded >
2018<p >
2119This wording is relative to <paper num =" N4988" />.
2220</p >
@@ -309,6 +307,341 @@ template<class ExecutionPolicy,
309307</li >
310308
311309
310+ <li ><p >Modify <sref ref =" [linalg.algs.blas3.inplacetrsm]" /> as indicated:</p >
311+
312+ <blockquote class =" note" >
313+ <p >
314+ [<i >Drafting note</i >: Nothing is wrong here, but it's nice to make the complexity clauses depend
315+ only on input if possible]
316+ </p >
317+ </blockquote >
318+
319+ <blockquote >
320+ <pre >
321+ template< <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage,
322+ <i >inout-matrix</i > InOutMat, class BinaryDivideOp>
323+ void triangular_matrix_matrix_right_solve(InMat A, Triangle t, DiagonalStorage d,
324+ InOutMat B, BinaryDivideOp divide);
325+ template< class ExecutionPolicy,
326+ <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage,
327+ <i >inout-matrix</i > InOutMat, class BinaryDivideOp>
328+ void triangular_matrix_matrix_right_solve(ExecutionPolicy&& exec,
329+ InMat A, Triangle t, DiagonalStorage d,
330+ InOutMat B, BinaryDivideOp divide);
331+ </pre >
332+ <blockquote >
333+ <p >
334+ [… ]
335+ <p />
336+ -13- <i >Complexity</i >: 𝒪 (<tt ><ins >B</ins ><del >A</del >.extent(0)</tt > ×
337+ <tt >A.extent(<ins >0</ins ><del >1</del >)</tt > × <tt ><ins >A</ins ><del >B</del >.extent(1)</tt >).
338+ </p >
339+ </blockquote >
340+
341+ </blockquote >
342+
343+ </li >
344+ </ol >
345+ </superseded >
346+
347+ <note >LWG telecon 2025-10-10; Fix proposed resolution after review</note >
348+ <p >Add missing `()` in one place and change `(0, 1)` to `()` in another.</p >
349+
350+ </discussion >
351+
352+ <resolution >
353+ <p >
354+ This wording is relative to <paper num =" N5014" />.
355+ </p >
356+
357+ <ol >
358+
359+ <li ><p >Modify <sref ref =" [linalg.algs.blas2.gemv]" /> as indicated:</p >
360+
361+ <blockquote class =" note" >
362+ <p >
363+ [<i >Drafting note</i >: This change is needed because the matrix <tt >A</tt > does not need to be square.
364+ <tt >x.extents(0)</tt > must equal <tt >A.extents(1)</tt >, while <tt >y.extents(0)</tt > must equal
365+ <tt >A.extents(0)</tt >.]
366+ </p >
367+ </blockquote >
368+
369+ <blockquote >
370+ <p >
371+ -3- <i >Mandates</i >:
372+ </p >
373+ <ol style =" list-style-type: none" >
374+ <li ><p >(3.1) — <tt ><i >possibly-multipliable</i >< decltype(A), decltype(x), decltype(y)> ()</tt >
375+ is <tt >true</tt >, and</p ></li >
376+ <li ><p >(3.2) — <tt ><i >possibly-addable</i >< decltype(<ins >y</ins ><del >x</del >), decltype(y),
377+ decltype(z)> ()</tt > is <tt >true</tt > for those overloads that take a <tt >z</tt > parameter.</p ></li >
378+ </ol >
379+ <p >
380+ -4- <i >Preconditions</i >:
381+ </p >
382+ <ol style =" list-style-type: none" >
383+ <li ><p >(4.1) — <tt ><i >multipliable</i >(A, x, y)</tt > is <tt >true</tt >, and</p ></li >
384+ <li ><p >(4.2) — <tt ><i >addable</i >(<ins >y</ins ><del >x</del >, y, z)</tt > is <tt >true</tt >
385+ for those overloads that take a <tt >z</tt > parameter.</p ></li >
386+ </ol >
387+ <p >
388+ -5- <i >Complexity</i >: 𝒪 (<tt ><ins >A</ins ><del >x</del >.extent(0)</tt > ×
389+ <tt ><ins >x</ins ><del >A</del >.extent(<ins >0</ins ><del >1</del >)</tt >).
390+ </p >
391+ </blockquote >
392+
393+ </li >
394+
395+ <li ><p >Modify <sref ref =" [linalg.algs.blas2.symv]" /> as indicated:</p >
396+
397+ <blockquote >
398+ <p >
399+ -3- <i >Mandates</i >:
400+ </p >
401+ <ol style =" list-style-type: none" >
402+ <li ><p >(3.1) — [… ]</p ></li >
403+ <li ><p >(3.2) — [… ]</p ></li >
404+ <li ><p >(3.3) — <tt ><i >possibly-multipliable</i >< decltype(A), decltype(x), decltype(y)> ()</tt >
405+ is <tt >true</tt >, and</p ></li >
406+ <li ><p >(3.4) — <tt ><i >possibly-addable</i >< decltype(<ins >y</ins ><del >x</del >), decltype(y),
407+ decltype(z)> ()</tt > is <tt >true</tt > for those overloads that take a <tt >z</tt > parameter.</p ></li >
408+ </ol >
409+ <p >
410+ -4- <i >Preconditions</i >:
411+ </p >
412+ <ol style =" list-style-type: none" >
413+ <li ><p >(4.1) — <tt >A.extent(0)</tt > equals <tt >A.extent(1)</tt >,</p ></li >
414+ <li ><p >(4.2) — <tt ><i >multipliable</i >(A, x, y)</tt > is <tt >true</tt >, and</p ></li >
415+ <li ><p >(4.3) — <tt ><i >addable</i >(<ins >y</ins ><del >x</del >, y, z)</tt > is <tt >true</tt >
416+ for those overloads that take a <tt >z</tt > parameter.</p ></li >
417+ </ol >
418+ <p >
419+ -5- <i >Complexity</i >: 𝒪 (<tt ><ins >A</ins ><del >x</del >.extent(0)</tt > ×
420+ <tt ><ins >x</ins ><del >A</del >.extent(<ins >0</ins ><del >1</del >)</tt >).
421+ </p >
422+ </blockquote >
423+
424+ </li >
425+
426+ <li ><p >Modify <sref ref =" [linalg.algs.blas2.hemv]" /> as indicated:</p >
427+
428+ <blockquote >
429+ <p >
430+ -3- <i >Mandates</i >:
431+ </p >
432+ <ol style =" list-style-type: none" >
433+ <li ><p >(3.1) — [… ]</p ></li >
434+ <li ><p >(3.2) — [… ]</p ></li >
435+ <li ><p >(3.3) — <tt ><i >possibly-multipliable</i >< decltype(A), decltype(x), decltype(y)> ()</tt >
436+ is <tt >true</tt >, and</p ></li >
437+ <li ><p >(3.4) — <tt ><i >possibly-addable</i >< decltype(<ins >y</ins ><del >x</del >), decltype(y),
438+ decltype(z)> ()</tt > is <tt >true</tt > for those overloads that take a <tt >z</tt > parameter.</p ></li >
439+ </ol >
440+ <p >
441+ -4- <i >Preconditions</i >:
442+ </p >
443+ <ol style =" list-style-type: none" >
444+ <li ><p >(4.1) — <tt >A.extent(0)</tt > equals <tt >A.extent(1)</tt >,</p ></li >
445+ <li ><p >(4.2) — <tt ><i >multipliable</i >(A, x, y)</tt > is <tt >true</tt >, and</p ></li >
446+ <li ><p >(4.3) — <tt ><i >addable</i >(<ins >y</ins ><del >x</del >, y, z)</tt > is <tt >true</tt >
447+ for those overloads that take a <tt >z</tt > parameter.</p ></li >
448+ </ol >
449+ <p >
450+ -5- <i >Complexity</i >: 𝒪 (<tt ><ins >A</ins ><del >x</del >.extent(0)</tt > ×
451+ <tt ><ins >x</ins ><del >A</del >.extent(<ins >0</ins ><del >1</del >)</tt >).
452+ </p >
453+ </blockquote >
454+
455+ </li >
456+
457+ <li ><p >Modify <sref ref =" [linalg.algs.blas2.trmv]" /> as indicated:</p >
458+
459+ <blockquote class =" note" >
460+ <p >
461+ [<i >Drafting note</i >: The extents compatibility conditions are expressed differently than in the
462+ above matrix-vector multiply sections, perhaps more for consistency with the TRSV section below.
463+ They look correct here. The original <i >Complexity</i > elements adjusted below are technically correct,
464+ since <math ><mi >A</mi ></math > is square, but changing this would improve consistency with
465+ <sref ref =" [linalg.algs.blas2.gemv]" />]
466+ </p >
467+ </blockquote >
468+
469+ <blockquote >
470+ <pre >
471+ template< <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage, <i >in-vector</i > InVec,
472+ <i >out-vector</i > OutVec>
473+ void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InVec x, OutVec y);
474+ template< class ExecutionPolicy,
475+ <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage, <i >in-vector</i > InVec,
476+ <i >out-vector</i > OutVec>
477+ void triangular_matrix_vector_product(ExecutionPolicy&& exec,
478+ InMat A, Triangle t, DiagonalStorage d, InVec x, OutVec y);
479+ </pre >
480+ <blockquote >
481+ <p >
482+ -5- [… ]
483+ <p />
484+ -6- <i >Effects</i >: Computes <math ><mi >y</mi > <mo >=</mo > <mi >A</mi ><mi >x</mi ></math >.
485+ <p />
486+ -5- <i >Complexity</i >: 𝒪 (<tt ><ins >A</ins ><del >x</del >.extent(0)</tt > ×
487+ <tt ><ins >x</ins ><del >A</del >.extent(<ins >0</ins ><del >1</del >)</tt >).
488+ </p >
489+ </blockquote >
490+ <pre >
491+ template< <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage, <i >inout-vector</i > InOutVec>
492+ void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d, InOutVec y);
493+ template< class ExecutionPolicy,
494+ <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage, <i >inout-vector</i > InOutVec>
495+ void triangular_matrix_vector_product(ExecutionPolicy&& exec,
496+ InMat A, Triangle t, DiagonalStorage d, InOutVec y);
497+ </pre >
498+ <blockquote >
499+ <p >
500+ -8- [… ]
501+ <p />
502+ -9- <i >Effects</i >: [… ]
503+ <p />
504+ -10- <i >Complexity</i >: 𝒪 (<tt ><ins >A</ins ><del >y</del >.extent(0)</tt > ×
505+ <tt ><ins >y</ins ><del >A</del >.extent(<ins >0</ins ><del >1</del >)</tt >).
506+ </p >
507+ </blockquote >
508+ <pre >
509+ template< <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage,
510+ <i >in-vector</i > InVec1, <i >in-vector</i > InVec2, <i >out-vector</i > OutVec>
511+ void triangular_matrix_vector_product(InMat A, Triangle t, DiagonalStorage d,
512+ InVec1 x, InVec2 y, OutVec z);
513+ template< class ExecutionPolicy,
514+ <i >in-matrix</i > InMat, class Triangle, class DiagonalStorage,
515+ <i >in-vector</i > InVec1, <i >in-vector</i > InVec2, <i >out-vector</i > OutVec>
516+ void triangular_matrix_vector_product(ExecutionPolicy&& exec,
517+ InMat A, Triangle t, DiagonalStorage d,
518+ InVec1 x, InVec2 y, OutVec z);
519+ </pre >
520+ <blockquote >
521+ <p >
522+ -11- [… ]
523+ <p />
524+ -12- <i >Effects</i >: Computes <math ><mi >z</mi > <mo >=</mo > <mi >y</mi > <mo >+</mo > <mi >A</mi ><mi >x</mi ></math >.
525+ <p />
526+ -13- <i >Complexity</i >: 𝒪 (<tt ><ins >A</ins ><del >x</del >.extent(0)</tt > ×
527+ <tt ><ins >x</ins ><del >A</del >.extent(<ins >0</ins ><del >1</del >)</tt >).
528+ </p >
529+ </blockquote >
530+ </blockquote >
531+
532+ </li >
533+
534+ <li ><p >Modify <sref ref =" [linalg.algs.blas3.rankk]" /> as indicated:</p >
535+
536+ <blockquote class =" note" >
537+ <p >
538+ [<i >Drafting note</i >: <paper num =" P3371R0" />, to be submitted in the August 15 mailing for
539+ LEWG review, contains the same wording changes to <sref ref =" [linalg.algs.blas3.rankk]" />
540+ and <sref ref =" [linalg.algs.blas3.rank2k]" /> as proposed here, with additional changes
541+ corresponding to that proposal. Please apply this LWG issue's changes first, before P3371 merges]
542+ </p >
543+ </blockquote >
544+
545+ <blockquote >
546+ <p >
547+ -3- <i >Mandates</i >:
548+ </p >
549+ <ol style =" list-style-type: none" >
550+ <li ><p >(3.1) — If <tt >InOutMat</tt > has <tt >layout_blas_packed</tt > layout, then the
551+ layout's <tt >Triangle</tt > template argument has the same type as the function's
552+ <tt >Triangle</tt > template argument; <ins >and</ins ></p ></li >
553+ <li ><p >(3.2) — <tt ><ins ><i >possibly-multipliable</i >< decltype(A),
554+ decltype(transposed(A)), decltype(C)> ()</ins > <del ><i >compatible-static-extents</i >< decltype(A),
555+ decltype(A)> (0, 1)</del ></tt > is <tt >true</tt ><ins >.</ins ><del >;</del ></p ></li >
556+ <li ><p ><del >(3.3) — <tt ><i >compatible-static-extents</i >< decltype(C), decltype(C)> (0, 1)</tt >
557+ is <tt >true</tt >; and</del ></p ></li >
558+ <li ><p ><del >(3.4) — <tt ><i >compatible-static-extents</i >< decltype(A), decltype(C)> (0, 0)</tt >
559+ is <tt >true</tt >.</del ></p ></li >
560+ </ol >
561+ <p >
562+ -4- <i >Preconditions</i >: <ins ><tt ><i >multipliable</i >(A, transposed(A), C)</tt > is <tt >true</tt >.</ins >
563+ </p >
564+ <ol style =" list-style-type: none" >
565+ <li ><p ><del >(4.1) — <tt >A.extent(0)</tt > equals <tt >A.extent(1)</tt >,</del ></p ></li >
566+ <li ><p ><del >(4.2) — <tt >C.extent(0)</tt > equals <tt >C.extent(1)</tt >, and</del ></p ></li >
567+ <li ><p ><del >(4.3) — <tt >A.extent(0)</tt > equals <tt >C.extent(0)</tt >.</del ></p ></li >
568+ </ol >
569+ <p >
570+ -5- <i >Complexity</i >: 𝒪 (<tt >A.extent(0)</tt > × <tt >A.extent(1)</tt > × <tt ><ins >A</ins ><del >C</del >.extent(0)</tt >).
571+ </p >
572+ </blockquote >
573+
574+ </li >
575+
576+ <li ><p >Modify <sref ref =" [linalg.algs.blas3.rank2k]" /> as indicated:</p >
577+
578+ <blockquote >
579+ <p >
580+ -3- <i >Mandates</i >:
581+ </p >
582+ <ol style =" list-style-type: none" >
583+ <li ><p >(3.1) — If <tt >InOutMat</tt > has <tt >layout_blas_packed</tt > layout, then the
584+ layout's <tt >Triangle</tt > template argument has the same type as the function's
585+ <tt >Triangle</tt > template argument;</p ></li >
586+ <li ><p >(3.2) — <tt ><ins ><i >possibly-multipliable</i >< decltype(A),
587+ decltype(transposed(B)), decltype(C)> ()</ins > <del ><i >possibly-addable</i >< decltype(A),
588+ decltype(B), decltype(C)> ()</del ></tt >
589+ is <tt >true</tt >; and</p ></li >
590+ <li ><p >(3.3) — <tt ><ins ><i >possibly-multipliable</i >< decltype(B),
591+ decltype(transposed(A)), decltype(C)> ()</ins > <del ><i >compatible-static-extents</i >< decltype(A),
592+ decltype(A)> (0, 1)</del ></tt > is <tt >true</tt >.</p ></li >
593+ </ol >
594+ <p >
595+ -4- <i >Preconditions</i >:
596+ </p >
597+ <ol style =" list-style-type: none" >
598+ <li ><p >(4.1) — <tt ><ins ><i >multipliable</i >(A, transposed(B), C)</ins >
599+ <del ><i >addable</i >(A, B, C)</del ></tt > is <tt >true</tt >, and</p ></li >
600+ <li ><p >(4.2) — <ins ><tt ><i >multipliable</i >(B, transposed(A), C)</tt > is <tt >true</tt ></ins >
601+ <del ><tt >A.extent(0)</tt > equals <tt >A.extent(1)</tt ></del >.</p ></li >
602+ </ol >
603+ <p >
604+ -5- <i >Complexity</i >: 𝒪 (<tt >A.extent(0)</tt > × <tt >A.extent(1)</tt > × <tt ><ins >B</ins ><del >C</del >.extent(0)</tt >).
605+ </p >
606+ </blockquote >
607+
608+ </li >
609+
610+ <li ><p >Modify <sref ref =" [linalg.algs.blas3.trsm]" /> as indicated:</p >
611+
612+ <blockquote class =" note" >
613+ <p >
614+ [<i >Drafting note</i >: Nothing is wrong here, but it's nice to make the complexity clauses depend
615+ only on input if possible]
616+ </p >
617+ </blockquote >
618+
619+ <blockquote >
620+ <pre >
621+ template< <i >in-matrix</i > InMat1, class Triangle, class DiagonalStorage,
622+ <i >in-matrix</i > InMat2, <i >out-matrix</i > OutMat, class BinaryDivideOp>
623+ void triangular_matrix_matrix_left_solve(InMat1 A, Triangle t, DiagonalStorage d,
624+ InMat2 B, OutMat X, BinaryDivideOp divide);
625+ template< class ExecutionPolicy,
626+ <i >in-matrix</i > InMat1, class Triangle, class DiagonalStorage,
627+ <i >in-matrix</i > InMat2, <i >out-matrix</i > OutMat, class BinaryDivideOp>
628+ void triangular_matrix_matrix_left_solve(ExecutionPolicy&& exec,
629+ InMat1 A, Triangle t, DiagonalStorage d,
630+ InMat2 B, OutMat X, BinaryDivideOp divide);
631+ </pre >
632+ <blockquote >
633+ <p >
634+ [… ]
635+ <p />
636+ -6- <i >Complexity</i >: 𝒪 (<tt >A.extent(0)</tt > × <tt ><ins >B</ins ><del >X</del >.extent(1)</tt > × <tt ><ins >B</ins ><del >X</del >.extent(1)</tt >).
637+ </p >
638+ </blockquote >
639+
640+ </blockquote >
641+
642+ </li >
643+
644+
312645<li ><p >Modify <sref ref =" [linalg.algs.blas3.inplacetrsm]" /> as indicated:</p >
313646
314647<blockquote class =" note" >
0 commit comments