@@ -355,6 +355,7 @@ export class GgufInsights {
355355 gpu : GgufTensorInfo [ ]
356356 } {
357357 const tensorInfo = this . _ggufFileInfo . fullTensorInfo ?? [ ] ;
358+ const architecture = this . _ggufFileInfo . metadata ?. general ?. architecture ;
358359
359360 if ( gpuLayers === 0 ) {
360361 return {
@@ -369,7 +370,15 @@ export class GgufInsights {
369370 const gpuTensors : GgufTensorInfo [ ] = [ ] ;
370371 const cpuTensors : GgufTensorInfo [ ] = [ ] ;
371372
373+ let tokenEmbedLayer : GgufTensorInfo | undefined ;
374+ let mainOutputLayer : GgufTensorInfo | undefined ;
375+
372376 for ( const singleTensorInfo of tensorInfo ) {
377+ if ( isMainOutputLayer ( singleTensorInfo . name ) )
378+ mainOutputLayer = singleTensorInfo ;
379+ else if ( isTokenEmbedLayer ( singleTensorInfo . name ) )
380+ tokenEmbedLayer = singleTensorInfo ;
381+
373382 // in the implementation of `llm_load_tensors`, layers with `LLM_TENSOR_LAYER_INPUT` are always
374383 // loaded with `model.dev_input`, which is always set to the CPU
375384 if ( isInputLayer ( singleTensorInfo . name ) ) {
@@ -391,8 +400,6 @@ export class GgufInsights {
391400 const { layerNumber} = parseTensorName ( singleTensorInfo . name ) ;
392401
393402 if ( gpuLayers !== this . totalLayers ) {
394- const architecture = this . _ggufFileInfo . metadata ?. general ?. architecture ;
395-
396403 if ( architecture === GgufArchitectureType . qwen2 || architecture === GgufArchitectureType . gemma ) {
397404 if ( layerNumber != null && layerNumber >= startGpuLayer )
398405 gpuTensors . push ( singleTensorInfo ) ;
@@ -409,6 +416,9 @@ export class GgufInsights {
409416 cpuTensors . push ( singleTensorInfo ) ;
410417 }
411418
419+ if ( mainOutputLayer == null && tokenEmbedLayer != null && gpuLayers === this . totalLayers && ! gpuTensors . includes ( tokenEmbedLayer ) )
420+ gpuTensors . push ( tokenEmbedLayer ) ;
421+
412422 return {
413423 cpu : cpuTensors ,
414424 gpu : gpuTensors
@@ -626,3 +636,15 @@ function isOutputLayer(layerName: string) {
626636
627637 return false ;
628638}
639+
640+ function isMainOutputLayer ( layerName : string ) {
641+ const [ firstPart ] = layerName . split ( "." ) ;
642+
643+ return firstPart === "output" ;
644+ }
645+
646+ function isTokenEmbedLayer ( layerName : string ) {
647+ const [ firstPart ] = layerName . split ( "." ) ;
648+
649+ return firstPart === "token_embd" ;
650+ }
0 commit comments