Merge pull request #28 from GreenBitAI/dev/haojin

yanghaojin · web-flow · commit 71a575da6052 · 2025-06-04T23:08:34.000+02:00
Dev/haojin
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [0.2.6] - 2025/6/4
+
+### Updated
+
+- Fixed RoPE type missing problem in deepseek-r1-qwen3-8B model
+- Update README with Qwen3 model notes and Transformers compatibility details
 
 ## [0.2.5] - 2025/5/30
 
diff --git a/README.md b/README.md
@@ -72,6 +72,12 @@ cd green-bit-llm
 pip install -r requirements.txt
 ```
 
+**Note: For Qwen3 model support, you need to install the development version of transformers:**
+```bash
+pip install git+https://github.com/huggingface/transformers.git
+```
+This installs transformers version 4.53.0.dev0 which includes the necessary Qwen3 model support.
+
 5. Install [Flash Attention](https://github.com/Dao-AILab/flash-attention) (`flash-attn`) according to their [official instructions](https://github.com/Dao-AILab/flash-attention?tab=readme-ov-file#installation-and-features).
 ```bash
 pip install flash-attn --no-build-isolation
diff --git a/green_bit_llm/common/model.py b/green_bit_llm/common/model.py
@@ -27,7 +27,8 @@
     find_layers,
     apply_dtype_to,
     apply_quant_strategy,
-    detect_moe_model_type
+    detect_moe_model_type,
+    load_config_with_rope_fix
 )
 
 from green_bit_llm.common.utils import (
@@ -379,7 +380,9 @@ def load(
         raise Exception("Error: Bitorch Engine layers are not available.")
 
     model_path = get_model_path(path_or_hf_repo)
-    config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+
+    # NOTE: fix RoPE type missing problem in deepseek-r1-qwen3-8B model.
+    config = load_config_with_rope_fix(model_path)
 
     layer_mode, bits, group_size = get_layer_mode(path_or_hf_repo, config)
 
diff --git a/green_bit_llm/common/utils.py b/green_bit_llm/common/utils.py
@@ -1,4 +1,6 @@
+import os
 import re
+import json
 from pathlib import Path
 from typing import Dict
 
@@ -364,4 +366,78 @@ def detect_moe_model_type(config):
             'has_shared_experts': getattr(config, 'n_shared_experts', 0) > 0
         })
 
-    return model_info
+    return model_info
+
+
+def fix_qwen3_rope_config(config_dict):
+    """
+    Fixed the RoPE configuration issue of the Qwen3 model
+
+    Args:
+    config_dict: Configuration dictionary
+    Returns:
+    The fixed configuration dictionary
+    """
+    if 'rope_scaling' in config_dict:
+        rope_scaling = config_dict['rope_scaling']
+
+        if rope_scaling is None:
+            return config_dict
+
+        if isinstance(rope_scaling, dict):
+            # Handling missing rope_type
+            if 'rope_type' not in rope_scaling:
+                if 'type' in rope_scaling:
+                    # If there is a 'type' field, rename it to 'rope_type'
+                    rope_scaling['rope_type'] = rope_scaling.pop('type')
+                else:
+                    # If none, add a default rope_type
+                    rope_scaling['rope_type'] = 'default'
+
+            # Make sure the rope_scaling dictionary is structured correctly
+            config_dict['rope_scaling'] = rope_scaling
+        else:
+            # If rope_scaling is not a dictionary, set it to None
+            config_dict['rope_scaling'] = None
+
+    return config_dict
+
+
+def load_config_with_rope_fix(model_path):
+    """
+    Safely load configurations and automatically fix RoPE configuration issues
+    """
+    try:
+        # Try to load normally first
+        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+        return config
+    except KeyError as e:
+        if "rope_scaling" in str(e) and "rope_type" in str(e):
+            print(f"RoPE configuration problem detected, trying to repair...")
+
+            # Read and repair the configuration file
+            config_path = os.path.join(model_path, "config.json")
+            if not os.path.exists(config_path):
+                raise FileNotFoundError(f"Configuration file does not exist: {config_path}")
+
+            with open(config_path, 'r', encoding='utf-8') as f:
+                config_dict = json.load(f)
+
+            # Fix RoPE configuration
+            config_dict = fix_qwen3_rope_config(config_dict)
+
+            # Create a config from the fixed dictionary
+            from transformers.models.auto.configuration_auto import CONFIG_MAPPING
+            model_type = config_dict.get('model_type', 'qwen3')
+
+            if model_type in CONFIG_MAPPING:
+                config_class = CONFIG_MAPPING[model_type]
+                config = config_class.from_dict(config_dict)
+            else:
+                # If the model type is not in the mapping, try the generic method
+                config = AutoConfig.from_dict(config_dict)
+
+            print(f"Successfully repaired RoPE configuration")
+            return config
+        else:
+            raise e
diff --git a/green_bit_llm/evaluation/evaluate.py b/green_bit_llm/evaluation/evaluate.py
@@ -212,9 +212,10 @@ def main(args):
 
     # Building configs
     tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
+
     pretrain_model_config = {
         "trust_remote_code": True if args.trust_remote_code else None,
-        "use_flash_attention_2": True if args.use_flash_attention_2 else None
+        "attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None
     }
 
     if args.eos_token is not None:
diff --git a/green_bit_llm/routing/libra_router b/green_bit_llm/routing/libra_router
@@ -1 +1 @@
-Subproject commit a9a20106520672aefd1260662e2e4c203fd203a9
+Subproject commit 3e166dcddee28de2bce9404a03b0ddda90fe26d9
diff --git a/green_bit_llm/sft/finetune.py b/green_bit_llm/sft/finetune.py
@@ -87,7 +87,7 @@ def main(args):
     tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
     pretrain_model_config = {
         "trust_remote_code": True if args.trust_remote_code else None,
-        "use_flash_attention_2": True if args.use_flash_attention_2 else None
+        "attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None
     }
 
     model, tokenizer, config = load(
diff --git a/green_bit_llm/sft/peft_lora.py b/green_bit_llm/sft/peft_lora.py
@@ -61,7 +61,7 @@ def main(args):
     tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
     pretrain_model_config = {
         "trust_remote_code": True if args.trust_remote_code else None,
-        "use_flash_attention_2": True if args.use_flash_attention_2 else None
+        "attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None
     }
 
     model, tokenizer, config = load(
diff --git a/green_bit_llm/version.py b/green_bit_llm/version.py
@@ -1 +1 @@
-__version__ = "0.2.5"
+__version__ = "0.2.6"

Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ def main(args):`
`87`	`87`	`tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}`
`88`	`88`	`pretrain_model_config = {`
`89`	`89`	`"trust_remote_code": True if args.trust_remote_code else None,`
`90`		`- "use_flash_attention_2": True if args.use_flash_attention_2 else None`
	`90`	`+ "attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None`
`91`	`91`	`}`
`92`	`92`
`93`	`93`	`model, tokenizer, config = load(`
Original file line number	Diff line number	Diff line change
`@@ -61,7 +61,7 @@ def main(args):`
`61`	`61`	`tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}`
`62`	`62`	`pretrain_model_config = {`
`63`	`63`	`"trust_remote_code": True if args.trust_remote_code else None,`
`64`		`- "use_flash_attention_2": True if args.use_flash_attention_2 else None`
	`64`	`+ "attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None`
`65`	`65`	`}`
`66`	`66`
`67`	`67`	`model, tokenizer, config = load(`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.2.5"`
	`1`	`+__version__ = "0.2.6"`