11#!/usr/bin/env node
22
3- import { GGMLQuantizationType , gguf } from "." ;
3+ import { GGMLQuantizationType , gguf , ggufAllShards , GGUFParseOutput } from "." ;
4+ import { GGML_QUANT_SIZES } from "./quant-descriptions" ;
45
56interface PrintColumnHeader {
67 name: string ;
@@ -10,11 +11,44 @@ interface PrintColumnHeader {
1011
1112const mapDtypeToName = Object . fromEntries ( Object . entries ( GGMLQuantizationType ) . map ( ( [ name , value ] ) => [ value , name ] ) ) ;
1213
14+ function showHelp ( exitCode : number ) {
15+ console . error ( "Usage: gguf-view [--help|-h] [--show-tensor] [--context|-c N] <path/to/gguf>" ) ;
16+ console . error ( " --help, -h Show this help message" ) ;
17+ console . error ( " --show-tensor Show tensor information" ) ;
18+ console . error ( " --context, -c N Number of tokens in context (default: 4096)" ) ;
19+ process . exit ( exitCode ) ;
20+ }
21+
1322async function main ( ) {
14- const ggufPath = process . argv [ 2 ] ;
15- const { metadata, tensorInfos } = await gguf ( ggufPath , {
23+ let ggufPath = "" ;
24+ let showTensors = false ;
25+ let nCtx = 4096 ;
26+ for ( let i = 2 ; i < process . argv . length ; i ++ ) {
27+ if ( process . argv [ i ] === "--help" || process . argv [ i ] === "-h" ) {
28+ showHelp ( 0 ) ;
29+ } else if ( process . argv [ i ] === "--show-tensor" ) {
30+ showTensors = true ;
31+ } else if ( process . argv [ i ] === "--context" || process . argv [ i ] === "-c" ) {
32+ nCtx = Number ( process . argv [ ++ i ] ) ;
33+ } else {
34+ ggufPath = process . argv [ i ] ;
35+ }
36+ }
37+
38+ if ( ! ggufPath . length ) {
39+ console . error ( "Error: Missing path to gguf file" ) ;
40+ showHelp ( 1 ) ;
41+ }
42+
43+ const { shards } = await ggufAllShards ( ggufPath , {
1644 allowLocalFile : true ,
1745 } ) ;
46+ const { metadata, tensorInfos } = shards [ 0 ] ;
47+
48+ // merge all metadata
49+ for ( let i = 1 ; i < shards . length ; i ++ ) {
50+ tensorInfos . push ( ...shards [ i ] . tensorInfos ) ;
51+ }
1852
1953 // TODO: print info about endianess
2054 console . log ( `* Dumping ${ Object . keys ( metadata ) . length } key/value pair(s)` ) ;
@@ -43,29 +77,110 @@ async function main() {
4377 ) ;
4478
4579 console . log ( ) ;
46- console . log ( `* Dumping ${ tensorInfos . length } tensor(s)` ) ;
47- printTable (
48- [
49- { name : "Idx" , alignRight : true } ,
50- { name : "Num Elements" , alignRight : true } ,
51- { name : "Shape" } ,
52- { name : "Data Type" } ,
53- { name : "Name" } ,
54- ] ,
55- tensorInfos . map ( ( tensorInfo , i ) => {
56- const shape = [ 1n , 1n , 1n , 1n ] ;
57- tensorInfo . shape . forEach ( ( dim , i ) => {
58- shape [ i ] = dim ;
59- } ) ;
60- return [
61- ( i + 1 ) . toString ( ) ,
62- shape . reduce ( ( acc , n ) => acc * n , 1n ) . toString ( ) ,
63- shape . map ( ( n ) => n . toString ( ) . padStart ( 6 ) ) . join ( ", " ) ,
64- mapDtypeToName [ tensorInfo . dtype ] ,
65- tensorInfo . name ,
66- ] ;
67- } )
68- ) ;
80+ console . log ( `* Memory usage estimation (with context length of ${ nCtx } tokens)` ) ;
81+ try {
82+ const kvUsage = calcMemoryUsage ( metadata as GGUFParseOutput < { strict : false } > [ "metadata" ] , nCtx ) ;
83+ let modelWeightInBytes = 0 ;
84+ for ( const tensorInfo of tensorInfos ) {
85+ const nElem = Number ( tensorInfo . shape . reduce ( ( a , b ) => a * b , 1n ) ) ;
86+ const tensorSizeInBytes = nElem * ( GGML_QUANT_SIZES [ tensorInfo . dtype ] / 8 ) ;
87+ modelWeightInBytes += tensorSizeInBytes ;
88+ }
89+ const overhead =
90+ calcMemoryUsage ( metadata as GGUFParseOutput < { strict : false } > [ "metadata" ] , 256 ) . totalBytes +
91+ modelWeightInBytes * 0.05 ;
92+ const totalMemoryUsage = kvUsage . totalBytes + overhead + modelWeightInBytes ;
93+ printTable (
94+ [ { name : "Item" } , { name : "Memory usage" , alignRight : true } ] ,
95+ [
96+ [ "K cache" , ( kvUsage . totalBytesK / 1e9 ) . toFixed ( 2 ) + " GB" ] ,
97+ [ "V cache" , ( kvUsage . totalBytesV / 1e9 ) . toFixed ( 2 ) + " GB" ] ,
98+ [ "Weight" , ( modelWeightInBytes / 1e9 ) . toFixed ( 2 ) + " GB" ] ,
99+ [ "Overhead" , ( overhead / 1e9 ) . toFixed ( 2 ) + " GB" ] ,
100+ [ "" , "---" ] ,
101+ [ "TOTAL" , ( totalMemoryUsage / 1e9 ) . toFixed ( 2 ) + " GB" ] ,
102+ ]
103+ ) ;
104+ } catch ( e ) {
105+ console . error ( `Error: ${ ( e as Error ) . message } ` ) ;
106+ }
107+
108+ if ( showTensors ) {
109+ console . log ( ) ;
110+ console . log ( `* Dumping ${ tensorInfos . length } tensor(s)` ) ;
111+ printTable (
112+ [
113+ { name : "Idx" , alignRight : true } ,
114+ { name : "Num Elements" , alignRight : true } ,
115+ { name : "Shape" } ,
116+ { name : "Data Type" } ,
117+ { name : "Name" } ,
118+ ] ,
119+ tensorInfos . map ( ( tensorInfo , i ) => {
120+ const shape = [ 1n , 1n , 1n , 1n ] ;
121+ tensorInfo . shape . forEach ( ( dim , i ) => {
122+ shape [ i ] = dim ;
123+ } ) ;
124+ return [
125+ ( i + 1 ) . toString ( ) ,
126+ shape . reduce ( ( acc , n ) => acc * n , 1n ) . toString ( ) ,
127+ shape . map ( ( n ) => n . toString ( ) . padStart ( 6 ) ) . join ( ", " ) ,
128+ mapDtypeToName [ tensorInfo . dtype ] ,
129+ tensorInfo . name ,
130+ ] ;
131+ } )
132+ ) ;
133+ } else {
134+ console . log ( ) ;
135+ console . log ( `* Use --show-tensor to display tensor information` ) ;
136+ }
137+ }
138+
139+ function calcMemoryUsage (
140+ metadata : GGUFParseOutput < { strict : false } > [ "metadata" ] ,
141+ kvSize : number ,
142+ kvTypeK : GGMLQuantizationType = GGMLQuantizationType . F16 ,
143+ kvTypeV : GGMLQuantizationType = GGMLQuantizationType . F16
144+ ) {
145+ const arch = metadata [ "general.architecture" ] ?? "unknown" ;
146+ const n_embd = ( metadata [ `${ arch } .embedding_length` ] as number ) ?? 0 ;
147+ const n_head = ( metadata [ `${ arch } .attention.head_count` ] as number ) ?? 0 ;
148+ const n_embd_head_k = ( metadata [ `${ arch } .attention.key_length` ] as number ) ?? n_embd / n_head ;
149+ const n_embd_head_v = ( metadata [ `${ arch } .attention.value_length` ] as number ) ?? n_embd / n_head ;
150+ const n_head_kv = ( metadata [ `${ arch } .attention.head_count_kv` ] as number [ ] | number ) ?? [ ] ;
151+ const n_layer = ( metadata [ `${ arch } .block_count` ] as number ) ?? 0 ;
152+
153+ if ( arch . startsWith ( "mamba" ) || arch . startsWith ( "rwkv" ) ) {
154+ throw new Error ( `Memory usage estimation for arch "${ arch } " is not supported` ) ;
155+ }
156+
157+ const n_head_kv_arr = Array ( n_layer ) . fill ( n_head ) ;
158+ if ( Array . isArray ( n_head_kv ) ) {
159+ for ( let i = 0 ; i < n_layer ; i ++ ) {
160+ if ( n_head_kv [ i ] ) {
161+ n_head_kv_arr [ i ] = n_head_kv [ i ] ;
162+ }
163+ }
164+ } else {
165+ for ( let i = 0 ; i < n_layer ; i ++ ) {
166+ n_head_kv_arr [ i ] = n_head_kv ;
167+ }
168+ }
169+
170+ let totalElemsK = 0 ;
171+ let totalElemsV = 0 ;
172+ for ( let i = 0 ; i < n_layer ; i ++ ) {
173+ const n_embd_k_gqa = n_embd_head_k * n_head_kv_arr [ i ] ;
174+ const n_embd_v_gqa = n_embd_head_v * n_head_kv_arr [ i ] ;
175+ totalElemsK += n_embd_k_gqa * kvSize ;
176+ totalElemsV += n_embd_v_gqa * kvSize ;
177+ }
178+
179+ return {
180+ totalBytesK : totalElemsK * ( GGML_QUANT_SIZES [ kvTypeK ] / 8 ) ,
181+ totalBytesV : totalElemsV * ( GGML_QUANT_SIZES [ kvTypeV ] / 8 ) ,
182+ totalBytes : ( totalElemsK + totalElemsV ) * ( GGML_QUANT_SIZES [ kvTypeV ] / 8 ) ,
183+ } ;
69184}
70185
71186function printTable ( header : PrintColumnHeader [ ] , rows : string [ ] [ ] , leftPad = 2 ) {
0 commit comments