@@ -158,6 +158,12 @@ typedef sycl::half2 ggml_half2;
158158
159159#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
160160
161+ #ifdef _MSC_VER
162+ #define GGML_EXTENSION
163+ #else // _MSC_VER
164+ #define GGML_EXTENSION __extension__
165+ #endif // _MSC_VER
166+
161167#define QK4_0 32
162168typedef struct {
163169 ggml_half d; // delta
@@ -167,7 +173,7 @@ static_assert(sizeof(block_q4_0) == sizeof(ggml_half) + QK4_0 / 2, "wrong q4_0 b
167173
168174#define QK4_1 32
169175typedef struct {
170- __extension__ union {
176+ GGML_EXTENSION union {
171177 struct {
172178 ggml_half d; // delta
173179 ggml_half m; // min
@@ -188,7 +194,7 @@ static_assert(sizeof(block_q5_0) == sizeof(ggml_half) + sizeof(uint32_t) + QK5_0
188194
189195#define QK5_1 32
190196typedef struct {
191- __extension__ union {
197+ GGML_EXTENSION union {
192198 struct {
193199 ggml_half d; // delta
194200 ggml_half m; // min
@@ -209,7 +215,7 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block
209215
210216#define QK8_1 32
211217typedef struct {
212- __extension__ union {
218+ GGML_EXTENSION union {
213219 struct {
214220 ggml_half d; // delta
215221 ggml_half s; // d * sum(qs[i])
@@ -250,7 +256,7 @@ static_assert(sizeof(block_tq2_0) == sizeof(ggml_half) + QK_K / 4, "wrong tq2_0
250256typedef struct {
251257 uint8_t scales[QK_K/16 ]; // scales and mins, quantized with 4 bits
252258 uint8_t qs[QK_K/4 ]; // quants
253- __extension__ union {
259+ GGML_EXTENSION union {
254260 struct {
255261 ggml_half d; // super-block scale for quantized scales
256262 ggml_half dmin; // super-block scale for quantized mins
@@ -277,7 +283,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12
277283// weight is represented as x = a * q + b
278284// Effectively 4.5 bits per weight
279285typedef struct {
280- __extension__ union {
286+ GGML_EXTENSION union {
281287 struct {
282288 ggml_half d; // super-block scale for quantized scales
283289 ggml_half dmin; // super-block scale for quantized mins
@@ -294,7 +300,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2,
294300// weight is represented as x = a * q + b
295301// Effectively 5.5 bits per weight
296302typedef struct {
297- __extension__ union {
303+ GGML_EXTENSION union {
298304 struct {
299305 ggml_half d; // super-block scale for quantized scales
300306 ggml_half dmin; // super-block scale for quantized mins
0 commit comments