This repository was archived by the owner on Sep 10, 2025. It is now read-only.
  
  
  
  
    
    
    
      
    
  
  
    
File tree Expand file tree Collapse file tree 1 file changed +7
-2
lines changed Expand file tree Collapse file tree 1 file changed +7
-2
lines changed Original file line number Diff line number Diff line change @@ -292,8 +292,13 @@ jobs:
292292        echo "::endgroup::" 
293293
294294        echo "::group::Run inference with quantize file" 
295-         python3 torchchat.py export --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
296-         python3 torchchat.py generate --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
295+         for DEVICE in cpu; do # cuda  
296+           # cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'` 
297+           # follow up with torchao as a separate PR 
298+           echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot" 
299+           python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
300+           python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth" 
301+         done 
297302        echo "::endgroup::" 
298303
299304test-gpu-aoti-float32 :
 
 
   
 
     
   
   
          
    
    
     
    
      
     
     
    You can’t perform that action at this time.
  
 
    
  
    
      
        
     
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments