Upgrade to Cosmopolitan v3.8.0

jart · jart · commit ea39f2654f58 · 2024-08-30T22:19:20.000-07:00
The latest cosmocc allows us to use Clang for compilation which improves
build latency for this project by 3x.
diff --git a/build/config.mk b/build/config.mk
@@ -2,7 +2,7 @@
 #── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
 
 PREFIX = /usr/local
-COSMOCC = .cosmocc/3.7.1
+COSMOCC = .cosmocc/3.8.0
 TOOLCHAIN = $(COSMOCC)/bin/cosmo
 
 CC = $(TOOLCHAIN)cc
@@ -14,7 +14,7 @@ INSTALL = install
 
 ARFLAGS = rcsD
 CXXFLAGS = -frtti -std=gnu++23
-CCFLAGS = -O2 -fexceptions -fsignaling-nans -ffunction-sections -fdata-sections
+CCFLAGS = -O2 -g -fexceptions -ffunction-sections -fdata-sections -mclang
 CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes -DLLAMAFILE_DEBUG
 TARGET_ARCH = -Xx86_64-mtune=znver4
 
@@ -52,5 +52,5 @@ clean:; rm -rf o
 .PHONY: distclean
 distclean:; rm -rf o .cosmocc
 
-.cosmocc/3.7.1:
-	build/download-cosmocc.sh $@ 3.7.1 13b65b0e659b493bd82f3d0a319d0265d66f849839e484aa2a54191024711e85
+.cosmocc/3.8.0:
+	build/download-cosmocc.sh $@ 3.8.0 813c6b2f95062d2e0a845307a79505424cb98cb038e8013334f8a22e3b92a474
diff --git a/llama.cpp/BUILD.mk b/llama.cpp/BUILD.mk
@@ -47,7 +47,7 @@ o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o \
 o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o \
 o/$(MODE)/llama.cpp/ggml-vector-arm80.o \
 o/$(MODE)/llama.cpp/ggml-vector-arm82.o: \
-		private CCFLAGS += -O3
+		private CCFLAGS += -O3 -mgcc
 
 o/$(MODE)/llama.cpp/ggml-alloc.o			\
 o/$(MODE)/llama.cpp/ggml-backend.o			\
@@ -60,6 +60,9 @@ o/$(MODE)/llama.cpp/ggml-alloc.o			\
 o/$(MODE)/llama.cpp/common.o:				\
 		private CCFLAGS += -Os
 
+o/$(MODE)/llama.cpp/unicode-data.o:			\
+		private CCFLAGS += -mgcc
+
 o/$(MODE)/llama.cpp/ggml-quants.o: private CXXFLAGS += -Os
 o/$(MODE)/llama.cpp/ggml-quants-amd-k8.o: private TARGET_ARCH += -Xx86_64-mtune=k8
 o/$(MODE)/llama.cpp/ggml-quants-amd-ssse3.o: private TARGET_ARCH += -Xx86_64-mtune=core2 -Xx86_64-mssse3
diff --git a/llama.cpp/ggml-backend.c b/llama.cpp/ggml-backend.c
@@ -425,7 +425,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
 
     // add forward decls here to avoid including the backend headers
 /* #ifdef GGML_USE_CUDA [jart] */
-/*     extern GGML_CALL void ggml_backend_cuda_reg_devices(void); */
+    extern GGML_CALL void ggml_backend_cuda_reg_devices(void);
     ggml_backend_cuda_reg_devices();
 /* #endif */
 
diff --git a/llama.cpp/ggml.c b/llama.cpp/ggml.c
@@ -144,13 +144,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
     if (result != 0) {
         // Handle allocation failure
         const char *error_desc = "unknown allocation error";
-        switch (result) {
-            case EINVAL:
-                error_desc = "invalid alignment value";
-                break;
-            case ENOMEM:
-                error_desc = "insufficient memory";
-                break;
+        if (result == EINVAL) {
+            error_desc = "invalid alignment value";
+        } else if (result == ENOMEM) {
+            error_desc = "insufficient memory";
         }
         GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
         GGML_ABORT("fatal error");
diff --git a/llama.cpp/llama-bench/main.cpp b/llama.cpp/llama-bench/main.cpp
@@ -15,7 +15,7 @@
 #include <string>
 #include <unistd.h>
 #include <vector>
-#include <tool/args/args.h>
+#include <cosmo.h>
 #include <stdatomic.h>
 #include <third_party/nsync/futex.internal.h>
 
diff --git a/llama.cpp/llava/BUILD.mk b/llama.cpp/llava/BUILD.mk
@@ -8,8 +8,6 @@ LLAMA_CPP_LLAVA_HDRS = $(filter %.h,$(LLAMA_CPP_LLAVA_FILES))
 LLAMA_CPP_LLAVA_SRCS = $(filter %.cpp,$(LLAMA_CPP_LLAVA_FILES))
 LLAMA_CPP_LLAVA_OBJS = $(LLAMA_CPP_LLAVA_SRCS:%.cpp=o/$(MODE)/%.o)
 
-.PHONY: tool/args/args.h
-
 o/$(MODE)/llama.cpp/llava/llava.a:					\
 		$(LLAMA_CPP_LLAVA_OBJS)
 
diff --git a/llama.cpp/main/main.cpp b/llama.cpp/main/main.cpp
@@ -15,7 +15,7 @@
 #include <string>
 #include <unistd.h>
 #include <vector>
-#include <tool/args/args.h>
+#include <cosmo.h>
 
 #include "llamafile/version.h"
 #include "llama.cpp/llama.h"
diff --git a/llama.cpp/server/server.cpp b/llama.cpp/server/server.cpp
@@ -24,8 +24,7 @@
 #include <condition_variable>
 #include <atomic>
 #include <signal.h>
-#include <libc/calls/pledge.h>
-#include <tool/args/args.h>
+#include <cosmo.h>
 #include <libc/dce.h>
 
 double g_prompt_per_second_jart;
diff --git a/llamafile/BUILD.mk b/llamafile/BUILD.mk
@@ -143,7 +143,7 @@ o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_fma.o	\
 o/$(MODE)/llamafile/tinyblas_cpu_sgemm_amd_zen4.o	\
 o/$(MODE)/llamafile/tinyblas_cpu_sgemm_arm80.o		\
 o/$(MODE)/llamafile/tinyblas_cpu_sgemm_arm82.o:		\
-		private CCFLAGS += -O3 -fopenmp
+		private CCFLAGS += -O3 -fopenmp -mgcc
 
 ################################################################################
 # testing
diff --git a/llamafile/ansiblas.h b/llamafile/ansiblas.h
@@ -27,7 +27,7 @@ namespace ansiBLAS {
 
 static constexpr int KN = 8;
 
-alignas(64) union Vector {
+union alignas(64) Vector {
     double v[KN];
 };
 
@@ -94,7 +94,7 @@ struct ansiBLAS {
     }
 
     template <int RM, int RN>
-    __target_clones("avx512f,fma") void gemm(int m0, int m, int n0, int n) {
+    void gemm(int m0, int m, int n0, int n) {
         int ytiles = (m - m0) / RM;
         int xtiles = (n - n0) / RN;
         int tiles = xtiles * ytiles;
diff --git a/llamafile/debug.cpp b/llamafile/debug.cpp
@@ -178,7 +178,7 @@ static void on_sigfpe(int sig, siginfo_t *si, void *arg) {
     }
     if (!once) {
         struct StackFrame sf = {.next = (struct StackFrame *)ctx->uc_mcontext.BP,
-                                .addr = ctx->uc_mcontext.PC};
+                                .addr = (intptr_t)ctx->uc_mcontext.PC};
         ShowBacktrace(2, &sf);
         const struct ggml_cgraph *g;
         if ((g = llamafile_debug_graph)) {
diff --git a/llamafile/server/main.cpp b/llamafile/server/main.cpp
@@ -16,7 +16,7 @@
 // limitations under the License.
 
 #include <assert.h>
-#include <tool/args/args.h>
+#include <cosmo.h>
 
 #include "llama.cpp/llama.h"
 #include "llamafile/llamafile.h"
diff --git a/llamafile/tokenize.cpp b/llamafile/tokenize.cpp
@@ -18,11 +18,11 @@
 #include "llamafile.h"
 #include "version.h"
 
+#include <cosmo.h>
 #include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <tool/args/args.h>
 
 #include "llama.cpp/llama.h"
 
diff --git a/whisper.cpp/server.cpp b/whisper.cpp/server.cpp
@@ -8,7 +8,6 @@
 #include "llama.cpp/json.h"
 
 #include <cosmo.h>
-#include <tool/args/args.h>
 #include <cmath>
 #include <fstream>
 #include <cstdio>