Skip to content

Commit 33a4f7b

Browse files
committed
Rework compiler flags (should be correct for Windows now)
1 parent d9ac8d2 commit 33a4f7b

File tree

2 files changed

+28
-15
lines changed

2 files changed

+28
-15
lines changed

exllamav3/ext.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,23 @@ def find_msvc():
8080
else:
8181
print(" !! Unable to find cl.exe; compilation will probably fail", file = sys.stderr)
8282

83-
# gcc / cl.exe flags
83+
# compiler flags
84+
85+
extra_cflags = []
86+
extra_cuda_cflags = ["-lineinfo", "-O3"]
8487

8588
if windows:
86-
extra_cflags = ["/Ox"]
89+
extra_cflags += ["/Ox", "/arch:AVX2"]
90+
extra_cuda_cflags += ["-Xcompiler", "/O2", "-Xcompiler", "/arch:AVX2"]
91+
if ext_debug:
92+
extra_cflags += ["/Zi"]
93+
extra_cuda_cflags += ["-Xcompiler", "/Zi"]
8794
else:
88-
extra_cflags = ["-Ofast"]
89-
90-
if ext_debug:
91-
extra_cflags += ["-ftime-report", "-DTORCH_USE_CUDA_DSA"]
92-
93-
# nvcc flags
94-
95-
extra_cuda_cflags = ["-lineinfo", "-O3", "-Xcompiler \"-O3 -mavx2\""]
95+
extra_cflags += ["-Ofast", "-mavx2"]
96+
extra_cuda_cflags += ["-Xcompiler=-O3", "-Xcompiler=-mavx2"]
97+
if ext_debug:
98+
extra_cflags += ["-ftime-report", "-DTORCH_USE_CUDA_DSA"]
99+
extra_cuda_cflags += ["-Xcompiler=-g"]
96100

97101
if torch.version.hip:
98102
extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"]

setup.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,21 @@
1717

1818
windows = os.name == "nt"
1919

20-
extra_cflags = ["/Ox"] if windows else ["-O3"]
20+
extra_cflags = []
21+
extra_cuda_cflags = ["-lineinfo", "-O3"]
2122

22-
if ext_debug:
23-
extra_cflags += ["-ftime-report", "-DTORCH_USE_CUDA_DSA"]
24-
25-
extra_cuda_cflags = ["-lineinfo", "-O3", "-Xcompiler \"-O3 -mavx2\""]
23+
if windows:
24+
extra_cflags += ["/Ox", "/arch:AVX2"]
25+
extra_cuda_cflags += ["-Xcompiler", "/O2", "-Xcompiler", "/arch:AVX2"]
26+
if ext_debug:
27+
extra_cflags += ["/Zi"]
28+
extra_cuda_cflags += ["-Xcompiler", "/Zi"]
29+
else:
30+
extra_cflags += ["-Ofast", "-mavx2"]
31+
extra_cuda_cflags += ["-Xcompiler=-O3", "-Xcompiler=-mavx2"]
32+
if ext_debug:
33+
extra_cflags += ["-ftime-report", "-DTORCH_USE_CUDA_DSA"]
34+
extra_cuda_cflags += ["-Xcompiler=-g"]
2635

2736
if torch and torch_version.hip:
2837
extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"]

0 commit comments

Comments
 (0)