- 
                Notifications
    
You must be signed in to change notification settings  - Fork 190
 
Preserve original rope scaling type in export due to transformers library AutoConfig issue #452
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
0c4194e
              fdd82cb
              3782091
              b6941c7
              85e1235
              70842b8
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| 
          
            
          
           | 
    @@ -227,6 +227,54 @@ def model_config_from_dict(d: dict) -> ModelConfig: | |
| return _from_dict(config_type, d) | ||
| 
     | 
||
| 
     | 
||
| def restore_original_rope_scaling(config_data: dict, original_model_path: str) -> dict: | ||
| """Restore original rope_scaling configuration if it was modified by transformers. | ||
| 
     | 
||
| Some VLM models like Qwen2.5-VL have their rope_scaling configuration modified | ||
| by the transformers library during loading (e.g., from "mrope" to "default" with | ||
| additional fields). This function restores the original configuration. | ||
| 
     | 
||
| Args: | ||
| config_data: The model configuration dictionary to restore | ||
| original_model_path: Path to the original model directory | ||
| 
     | 
||
| Returns: | ||
| The config_data dictionary with restored rope_scaling (modified in-place) | ||
| """ | ||
| import json | ||
| import warnings | ||
| from pathlib import Path | ||
                
       | 
||
| 
     | 
||
| try: | ||
| 
         There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we just do a copy of the config.json and keep it the same as before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer the current solution as we can't keep it the same because we need to add the quantization config into   | 
||
| original_config_file = Path(original_model_path) / "config.json" | ||
| if original_config_file.exists(): | ||
| with open(original_config_file) as f: | ||
| raw_original_config = json.load(f) | ||
                
      
                  coderabbitai[bot] marked this conversation as resolved.
               
              
                Outdated
          
            Show resolved
            Hide resolved
         | 
||
| 
     | 
||
| # Check if rope_scaling was modified from mrope to default | ||
| if ( | ||
| "rope_scaling" in raw_original_config | ||
| and "rope_scaling" in config_data | ||
| and raw_original_config["rope_scaling"].get("type") == "mrope" | ||
| and config_data["rope_scaling"].get("type") == "default" | ||
| and "rope_type" in config_data["rope_scaling"] | ||
| ): | ||
| print(f"Restoring original rope_scaling configuration from {original_model_path}") | ||
| config_data["rope_scaling"] = raw_original_config["rope_scaling"] | ||
| 
     | 
||
| # Also restore rope_scaling in text_config if it exists | ||
| if ( | ||
| "text_config" in config_data | ||
| and "rope_scaling" in config_data["text_config"] | ||
| and config_data["text_config"]["rope_scaling"].get("type") == "default" | ||
| ): | ||
| config_data["text_config"]["rope_scaling"] = raw_original_config["rope_scaling"] | ||
| except Exception as e: | ||
| warnings.warn(f"Could not restore original rope_scaling configuration: {e}") | ||
| 
     | 
||
| return config_data | ||
| 
     | 
||
| 
     | 
||
| def pad_weights(weights, tp_size): | ||
| """Returns the padded weights to tp_size.""" | ||
| assert len(weights.shape) > 1 | ||
| 
          
            
          
           | 
    ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can move this above if-else instead of having it in both if and else conditions
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, moved it below if-else.