Commit 78de773
CUDA: faster prompt processing for 4-bit quants (#713)
* Use __byte_perm in get_int_from_table_16
* Use get_int_from_table_16 everywhere for 4-bit quants
---------
Co-authored-by: Iwan Kawrakow <[email protected]>1 parent 0cb6696 commit 78de773
File tree
4 files changed
+76
-63
lines changed- ggml/src/ggml-cuda
- template-instances
4 files changed
+76
-63
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
246 | 246 | | |
247 | 247 | | |
248 | 248 | | |
| 249 | + | |
| 250 | + | |
| 251 | + | |
| 252 | + | |
| 253 | + | |
| 254 | + | |
| 255 | + | |
| 256 | + | |
| 257 | + | |
| 258 | + | |
| 259 | + | |
| 260 | + | |
| 261 | + | |
| 262 | + | |
| 263 | + | |
| 264 | + | |
| 265 | + | |
| 266 | + | |
| 267 | + | |
249 | 268 | | |
250 | 269 | | |
251 | 270 | | |
| |||
255 | 274 | | |
256 | 275 | | |
257 | 276 | | |
| 277 | + | |
258 | 278 | | |
259 | 279 | | |
260 | 280 | | |
| |||
389 | 409 | | |
390 | 410 | | |
391 | 411 | | |
392 | | - | |
393 | 412 | | |
394 | 413 | | |
395 | 414 | | |
396 | 415 | | |
397 | 416 | | |
398 | 417 | | |
399 | | - | |
| 418 | + | |
400 | 419 | | |
401 | 420 | | |
402 | | - | |
403 | | - | |
404 | | - | |
| 421 | + | |
| 422 | + | |
| 423 | + | |
405 | 424 | | |
406 | 425 | | |
407 | 426 | | |
| |||
560 | 579 | | |
561 | 580 | | |
562 | 581 | | |
563 | | - | |
564 | 582 | | |
565 | 583 | | |
566 | 584 | | |
| |||
569 | 587 | | |
570 | 588 | | |
571 | 589 | | |
572 | | - | |
| 590 | + | |
573 | 591 | | |
574 | 592 | | |
575 | 593 | | |
576 | 594 | | |
577 | | - | |
578 | | - | |
579 | | - | |
| 595 | + | |
| 596 | + | |
| 597 | + | |
580 | 598 | | |
581 | 599 | | |
582 | 600 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
2509 | 2509 | | |
2510 | 2510 | | |
2511 | 2511 | | |
2512 | | - | |
2513 | | - | |
2514 | | - | |
2515 | 2512 | | |
2516 | 2513 | | |
2517 | 2514 | | |
| |||
2523 | 2520 | | |
2524 | 2521 | | |
2525 | 2522 | | |
2526 | | - | |
2527 | | - | |
| 2523 | + | |
2528 | 2524 | | |
2529 | 2525 | | |
2530 | | - | |
2531 | | - | |
| 2526 | + | |
| 2527 | + | |
2532 | 2528 | | |
2533 | | - | |
2534 | | - | |
| 2529 | + | |
| 2530 | + | |
2535 | 2531 | | |
2536 | 2532 | | |
2537 | 2533 | | |
| |||
2842 | 2838 | | |
2843 | 2839 | | |
2844 | 2840 | | |
2845 | | - | |
2846 | | - | |
2847 | | - | |
2848 | 2841 | | |
2849 | 2842 | | |
2850 | 2843 | | |
| |||
2857 | 2850 | | |
2858 | 2851 | | |
2859 | 2852 | | |
2860 | | - | |
| 2853 | + | |
2861 | 2854 | | |
2862 | 2855 | | |
2863 | 2856 | | |
2864 | 2857 | | |
2865 | | - | |
2866 | | - | |
| 2858 | + | |
2867 | 2859 | | |
2868 | | - | |
2869 | | - | |
| 2860 | + | |
| 2861 | + | |
2870 | 2862 | | |
2871 | | - | |
2872 | | - | |
| 2863 | + | |
| 2864 | + | |
2873 | 2865 | | |
2874 | 2866 | | |
2875 | 2867 | | |
| |||
2896 | 2888 | | |
2897 | 2889 | | |
2898 | 2890 | | |
2899 | | - | |
2900 | | - | |
2901 | | - | |
2902 | 2891 | | |
2903 | 2892 | | |
2904 | 2893 | | |
| |||
2913 | 2902 | | |
2914 | 2903 | | |
2915 | 2904 | | |
2916 | | - | |
| 2905 | + | |
| 2906 | + | |
2917 | 2907 | | |
2918 | 2908 | | |
2919 | 2909 | | |
2920 | | - | |
2921 | | - | |
| 2910 | + | |
2922 | 2911 | | |
2923 | 2912 | | |
2924 | | - | |
2925 | | - | |
| 2913 | + | |
| 2914 | + | |
2926 | 2915 | | |
2927 | | - | |
2928 | | - | |
| 2916 | + | |
| 2917 | + | |
2929 | 2918 | | |
2930 | 2919 | | |
2931 | 2920 | | |
| |||
Lines changed: 6 additions & 10 deletions
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
14 | 14 | | |
15 | 15 | | |
16 | 16 | | |
17 | | - | |
18 | | - | |
19 | | - | |
20 | 17 | | |
21 | 18 | | |
22 | 19 | | |
| |||
31 | 28 | | |
32 | 29 | | |
33 | 30 | | |
34 | | - | |
| 31 | + | |
35 | 32 | | |
36 | 33 | | |
37 | 34 | | |
38 | 35 | | |
39 | 36 | | |
40 | | - | |
41 | | - | |
| 37 | + | |
42 | 38 | | |
43 | | - | |
44 | | - | |
| 39 | + | |
| 40 | + | |
45 | 41 | | |
46 | | - | |
47 | | - | |
| 42 | + | |
| 43 | + | |
48 | 44 | | |
49 | 45 | | |
50 | 46 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1126 | 1126 | | |
1127 | 1127 | | |
1128 | 1128 | | |
1129 | | - | |
1130 | | - | |
1131 | | - | |
1132 | | - | |
1133 | | - | |
1134 | | - | |
1135 | | - | |
1136 | | - | |
1137 | | - | |
1138 | | - | |
1139 | | - | |
1140 | | - | |
1141 | | - | |
1142 | | - | |
1143 | 1129 | | |
| 1130 | + | |
| 1131 | + | |
| 1132 | + | |
| 1133 | + | |
| 1134 | + | |
| 1135 | + | |
| 1136 | + | |
| 1137 | + | |
| 1138 | + | |
| 1139 | + | |
| 1140 | + | |
| 1141 | + | |
| 1142 | + | |
| 1143 | + | |
| 1144 | + | |
| 1145 | + | |
| 1146 | + | |
| 1147 | + | |
| 1148 | + | |
1144 | 1149 | | |
1145 | 1150 | | |
1146 | 1151 | | |
| |||
1150 | 1155 | | |
1151 | 1156 | | |
1152 | 1157 | | |
| 1158 | + | |
| 1159 | + | |
| 1160 | + | |
| 1161 | + | |
| 1162 | + | |
1153 | 1163 | | |
1154 | 1164 | | |
1155 | 1165 | | |
| |||
0 commit comments