From 9308808920d4f9c5281f03a477ebad251d3d0cc5 Mon Sep 17 00:00:00 2001
From: Garcia Orozco <david.garcia.orozco@intel.com>
Date: Mon, 23 Sep 2024 07:34:05 -0700
Subject: [PATCH 1/3] Introduce %fp-model-precise expansion

---
 sycl/test-e2e/Matrix/SG32/joint_matrix_apply_two_matrices.cpp | 2 +-
 sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp  | 2 +-
 .../Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp        | 2 +-
 .../Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp       | 2 +-
 .../Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp     | 2 +-
 .../SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp       | 2 +-
 .../SG32/joint_matrix_apply_two_matrices.cpp                  | 2 +-
 .../SG32/joint_matrix_bf16_fill_k_cache.cpp                   | 2 +-
 .../SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp               | 2 +-
 .../SG32/joint_matrix_bf16_fill_k_cache_init.cpp              | 2 +-
 .../SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp            | 2 +-
 .../SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp       | 2 +-
 .../SPVCooperativeMatrix/joint_matrix_apply_two_matrices.cpp  | 2 +-
 .../SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache.cpp   | 4 ++--
 .../joint_matrix_bf16_fill_k_cache_OOB.cpp                    | 4 ++--
 .../joint_matrix_bf16_fill_k_cache_SLM.cpp                    | 4 ++--
 .../joint_matrix_bf16_fill_k_cache_init.cpp                   | 2 +-
 .../joint_matrix_bf16_fill_k_cache_unroll.cpp                 | 2 +-
 .../joint_matrix_bf16_fill_k_cache_unroll_init.cpp            | 2 +-
 sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices.cpp      | 2 +-
 sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp       | 4 ++--
 sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp   | 4 ++--
 sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp   | 4 ++--
 sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp  | 2 +-
 .../Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp        | 4 ++--
 .../test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp | 2 +-
 .../Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp     | 2 +-
 sycl/test-e2e/lit.cfg.py                                      | 2 ++
 28 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_apply_two_matrices.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_apply_two_matrices.cpp
index 8e5ebeed1c222..e0aa84e460731 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_apply_two_matrices.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_apply_two_matrices.cpp
@@ -10,7 +10,7 @@
 // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
 // UNSUPPORTED: gpu-intel-dg2
 
-// RUN: %{build} -ffp-model=precise -o %t.out
+// RUN: %{build} %fp-model-precise -o %t.out
 // RUN: %{run} %t.out
 
 #include "../common.hpp"
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
index 8bb170606c9c1..10e01af93d239 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -o %t_vnni.out -DVNNI -ffp-model=precise
+// RUN: %{build} -o %t_vnni.out -DVNNI %fp-model-precise
 // RUN: %{run} %t_vnni.out
 
 // TODO: add row major compilation and run once Sub-group size 32
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp
index bd22fa19354b1..40f5576c0042b 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp
@@ -9,7 +9,7 @@
 // UNSUPPORTED: gpu-intel-dg2
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -o %t_gpu_vnni.out -ffp-model=precise -DSLM -DVNNI
+// RUN: %{build} -o %t_gpu_vnni.out %fp-model-precise -DSLM -DVNNI
 // RUN: %{run} %t_gpu_vnni.out
 
 // TODO: add row major compilation and run once Sub-group size 32
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
index 16e620f167457..db358793a39f7 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix, gpu
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -o %t.out -DINIT_LIST -DVNNI -ffp-model=precise
+// RUN: %{build} -o %t.out -DINIT_LIST -DVNNI %fp-model-precise
 // RUN: %{run} %t.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
index 268bf3edbc69b..79b50f2d88a5d 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -mllvm -inline-threshold=5000 -ffp-model=precise -o %t.out -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t.out
 
 // -mllvm -inline-threshold added as a workaround,
diff --git a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
index 9abfda91c6237..42c04e480b9ae 100644
--- a/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
+++ b/sycl/test-e2e/Matrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix, gpu
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -mllvm -inline-threshold=5000 -ffp-model=precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold added as a workaround,
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_apply_two_matrices.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_apply_two_matrices.cpp
index 5fa27fc66df1c..84a2bc54d791f 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_apply_two_matrices.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_apply_two_matrices.cpp
@@ -10,7 +10,7 @@
 // SG size = 32 is not currently supported for SYCL Joint Matrix by IGC on DG2
 // UNSUPPORTED: gpu-intel-dg2
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -ffp-model=precise -o %t.out
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX %fp-model-precise -o %t.out
 // RUN: %{run} %t.out
 
 // XFAIL: cpu
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
index 91175103be6cc..8420948046337 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_vnni.out -DVNNI -ffp-model=precise
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_vnni.out -DVNNI %fp-model-precise
 // RUN: %{run} %t_vnni.out
 
 // TODO: add row major compilation and run once Sub-group size 32
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp
index 62cef33b3beb7..fb6eeed328995 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_SLM.cpp
@@ -9,7 +9,7 @@
 // UNSUPPORTED: gpu-intel-dg2
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu_vnni.out -ffp-model=precise -DSLM -DVNNI
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu_vnni.out %fp-model-precise -DSLM -DVNNI
 // RUN: %{run} %t_gpu_vnni.out
 
 // TODO: add row major compilation and run once Sub-group size 32
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
index eda198c7b2a41..6bcdcbcb79e17 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix, gpu
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out -DINIT_LIST -DVNNI -ffp-model=precise
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out -DINIT_LIST -DVNNI %fp-model-precise
 // RUN: %{run} %t.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
index df0b207c0605b..c55158d5717e8 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -mllvm -inline-threshold=5000 -ffp-model=precise -o %t.out -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -mllvm -inline-threshold=5000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t.out
 
 // XFAIL: cpu
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
index 7fdd5e62736bc..b428a505f0e90 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/SG32/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -10,7 +10,7 @@
 // REQUIRES: aspect-ext_intel_matrix, gpu
 // REQUIRES-INTEL-DRIVER: lin: 27501, win: 101.4943
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -mllvm -inline-threshold=5000 -ffp-model=precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -mllvm -inline-threshold=5000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold added as a workaround,
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_apply_two_matrices.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_apply_two_matrices.cpp
index 827b0f4ced181..debf296c010b0 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_apply_two_matrices.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_apply_two_matrices.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -ffp-model=precise -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out
+// RUN: %{build} %fp-model-precise -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out
 // RUN: %{run} %t.out
 
 // XFAIL: cpu
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache.cpp
index eb68e332eaa7c..7a3631fb2f05b 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_vnni.out -DVNNI -ffp-model=precise
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_vnni.out -DVNNI %fp-model-precise
 // RUN: %{run} %t_vnni.out
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out -ffp-model=precise
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out %fp-model-precise
 // RUN: %{run} %t.out
 
 // XFAIL: cpu
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
index af4b28e090b4c..e657f16ec2664 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
@@ -8,10 +8,10 @@
 // REQUIRES: aspect-ext_intel_matrix, gpu
 // UNSUPPORTED: gpu-intel-dg2
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu_vnni.out -ffp-model=precise -DOOB -DVNNI
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu_vnni.out %fp-model-precise -DOOB -DVNNI
 // RUN: %{run} %t_gpu_vnni.out
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu.out -ffp-model=precise -DOOB
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu.out %fp-model-precise -DOOB
 // RUN: %{run} %t_gpu.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
index d81e7dbd685ba..c6c33cc3c4632 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu_vnni.out -ffp-model=precise -DSLM -DVNNI
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu_vnni.out %fp-model-precise -DSLM -DVNNI
 // RUN: %{run} %t_gpu_vnni.out
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu.out -ffp-model=precise -DSLM
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t_gpu.out %fp-model-precise -DSLM
 // RUN: %{run} %t_gpu.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_init.cpp
index f4c77bb50c5da..60df4c0a6192e 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_init.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out -DINIT_LIST -DVNNI -ffp-model=precise
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -o %t.out -DINIT_LIST -DVNNI %fp-model-precise
 // RUN: %{run} %t.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
index 89e9048170837..ac4bbaec52169 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -mllvm -inline-threshold=2000 -ffp-model=precise -o %t.out -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -D__SPIRV_USE_COOPERATIVE_MATRIX -mllvm -inline-threshold=2000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t.out
 
 // XFAIL: cpu
diff --git a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
index d653268345dcc..ded7492812912 100644
--- a/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
+++ b/sycl/test-e2e/Matrix/SPVCooperativeMatrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
diff --git a/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices.cpp b/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices.cpp
index e61448852710b..704afb50279de 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_apply_two_matrices.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -ffp-model=precise -o %t.out
+// RUN: %{build} %fp-model-precise -o %t.out
 // RUN: %{run} %t.out
 
 #include "common.hpp"
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
index 349197641a428..9eddd18fae5e4 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -o %t_vnni.out -DVNNI -ffp-model=precise
+// RUN: %{build} -o %t_vnni.out -DVNNI%fp-model-precise
 // RUN: %{run} %t_vnni.out
 
-// RUN: %{build} -o %t.out -ffp-model=precise
+// RUN: %{build} -o %t.out %fp-model-precise
 // RUN: %{run} %t.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
index 0f2c8c71bd223..3c38f9740c932 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_OOB.cpp
@@ -8,10 +8,10 @@
 // REQUIRES: aspect-ext_intel_matrix, gpu
 // UNSUPPORTED: gpu-intel-dg2
 
-// RUN: %{build} -o %t_gpu_vnni.out -ffp-model=precise -DOOB -DVNNI
+// RUN: %{build} -o %t_gpu_vnni.out %fp-model-precise -DOOB -DVNNI
 // RUN: %{run} %t_gpu_vnni.out
 
-// RUN: %{build} -o %t_gpu.out -ffp-model=precise -DOOB
+// RUN: %{build} -o %t_gpu.out %fp-model-precise -DOOB
 // RUN: %{run} %t_gpu.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
index a30d6320038a8..75a22323ceaef 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -o %t_gpu_vnni.out -ffp-model=precise -DSLM -DVNNI
+// RUN: %{build} -o %t_gpu_vnni.out %fp-model-precise -DSLM -DVNNI
 // RUN: %{run} %t_gpu_vnni.out
 
-// RUN: %{build} -o %t_gpu.out -ffp-model=precise -DSLM
+// RUN: %{build} -o %t_gpu.out %fp-model-precise -DSLM
 // RUN: %{run} %t_gpu.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp
index 9355b0cd56115..d490e52b2bbd9 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_init.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -o %t.out -DINIT_LIST -DVNNI -ffp-model=precise
+// RUN: %{build} -o %t.out -DINIT_LIST -DVNNI %fp-model-precise
 // RUN: %{run} %t.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp
index 8893c7e638ef2..191bd6731ba8b 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_prefetch.cpp
@@ -8,10 +8,10 @@
 // REQUIRES: aspect-ext_intel_matrix
 // UNSUPPORTED: gpu-intel-dg2
 
-// RUN: %{build} -o %t_vnni.out -DPREFETCH -DVNNI -ffp-model=precise
+// RUN: %{build} -o %t_vnni.out -DPREFETCH -DVNNI %fp-model-precise
 // RUN: %{run} %t_vnni.out
 
-// RUN: %{build} -o %t.out -DPREFETCH -ffp-model=precise
+// RUN: %{build} -o %t.out -DPREFETCH %fp-model-precise
 // RUN: %{run} %t.out
 
 // -ffp-model=precise is added to not depend on compiler defaults.
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
index 9aa8923bfeea4..aae0bbb03adc2 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t.out -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
index 5e97bda664481..38fe66c59a53c 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix, gpu
 
-// RUN: %{build} -mllvm -inline-threshold=2000 -ffp-model=precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
+// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
 // RUN: %{run} %t_gpu.out
 
 // -mllvm -inline-threshold=2000 added as a workaround,
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
index c2582467dd4cf..a7c9ca230401e 100644
--- a/sycl/test-e2e/lit.cfg.py
+++ b/sycl/test-e2e/lit.cfg.py
@@ -358,6 +358,7 @@ def open_check_file(file_name):
     config.substitutions.append(("%fPIC", ""))
     config.substitutions.append(("%shared_lib", "/LD"))
     config.substitutions.append(("%no_opt", "/Od"))
+    config.substitutions.append(("%fp-model-precise", "/fp:precise"))
 else:
     config.substitutions.append(
         (
@@ -383,6 +384,7 @@ def open_check_file(file_name):
     )
     config.substitutions.append(("%shared_lib", "-shared"))
     config.substitutions.append(("%no_opt", "-O0"))
+    config.substitutions.append(("%fp-model-precise", "-ffp-model=precise"))
 
 # Check if user passed verbose-print parameter, if yes, add VERBOSE_PRINT macro
 if "verbose-print" in lit_config.params:

From f6ffca7b8c11bcbbcc5b99a0b116d6c05d1c89c6 Mon Sep 17 00:00:00 2001
From: "Garcia Orozco, David" <david.garcia.orozco@intel.com>
Date: Mon, 23 Sep 2024 08:11:04 -0700
Subject: [PATCH 2/3] Change %fp-model expansion to allow using other models

---
 .../Regression/optimization_level_debug_info_specopt.cpp      | 3 +--
 sycl/test-e2e/lit.cfg.py                                      | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp b/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp
index 57246c6cf4542..8127c1883879c 100644
--- a/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp
+++ b/sycl/test-e2e/Regression/optimization_level_debug_info_specopt.cpp
@@ -1,7 +1,6 @@
-// DEFINE: %{ffpflags} = %if cl_options %{/clang:-ffp-model=fast%} %else %{-ffp-model=fast%}
 // DEFINE: %{Oz} = %if cl_options %{/clang:-Oz%} %else %{-Oz%}
 // DEFINE: %{O} = %if cl_options %{/clang:-O%} %else %{-O%}
-// RUN: %{build} %debug_option %{ffpflags} -o %t.out
+// RUN: %{build} %debug_option %fp-model-fast -o %t.out
 // RUN: %{build} %debug_option -Os -o %t.out
 // RUN: %{build} %debug_option %{Oz} -o %t.out
 // RUN: %{build} %debug_option -Og -o %t.out
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
index a7c9ca230401e..d1e77abd7e51f 100644
--- a/sycl/test-e2e/lit.cfg.py
+++ b/sycl/test-e2e/lit.cfg.py
@@ -358,7 +358,7 @@ def open_check_file(file_name):
     config.substitutions.append(("%fPIC", ""))
     config.substitutions.append(("%shared_lib", "/LD"))
     config.substitutions.append(("%no_opt", "/Od"))
-    config.substitutions.append(("%fp-model-precise", "/fp:precise"))
+    config.substitutions.append(("%fp-model-", "/fp:"))
 else:
     config.substitutions.append(
         (
@@ -384,7 +384,7 @@ def open_check_file(file_name):
     )
     config.substitutions.append(("%shared_lib", "-shared"))
     config.substitutions.append(("%no_opt", "-O0"))
-    config.substitutions.append(("%fp-model-precise", "-ffp-model=precise"))
+    config.substitutions.append(("%fp-model-", "-ffp-model="))
 
 # Check if user passed verbose-print parameter, if yes, add VERBOSE_PRINT macro
 if "verbose-print" in lit_config.params:

From cfa3a2596cf3a5a96b23a89b5d2161fd635f1fab Mon Sep 17 00:00:00 2001
From: "Garcia Orozco, David" <david.garcia.orozco@intel.com>
Date: Mon, 23 Sep 2024 11:38:10 -0700
Subject: [PATCH 3/3] Add missing space between arguments

---
 sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
index 9eddd18fae5e4..b4c742383e4ab 100644
--- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
+++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // REQUIRES: aspect-ext_intel_matrix
 
-// RUN: %{build} -o %t_vnni.out -DVNNI%fp-model-precise
+// RUN: %{build} -o %t_vnni.out -DVNNI %fp-model-precise
 // RUN: %{run} %t_vnni.out
 
 // RUN: %{build} -o %t.out %fp-model-precise