Skip to content

Commit 71a575d

Browse files
authored
Merge pull request #28 from GreenBitAI/dev/haojin
Dev/haojin
2 parents 23be85d + f957880 commit 71a575d

File tree

9 files changed

+100
-8
lines changed

9 files changed

+100
-8
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](http://keepachangelog.com/)
55
and this project adheres to [Semantic Versioning](http://semver.org/).
66

7+
## [0.2.6] - 2025/6/4
8+
9+
### Updated
10+
11+
- Fixed RoPE type missing problem in deepseek-r1-qwen3-8B model
12+
- Update README with Qwen3 model notes and Transformers compatibility details
713

814
## [0.2.5] - 2025/5/30
915

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ cd green-bit-llm
7272
pip install -r requirements.txt
7373
```
7474

75+
**Note: For Qwen3 model support, you need to install the development version of transformers:**
76+
```bash
77+
pip install git+https://github.com/huggingface/transformers.git
78+
```
79+
This installs transformers version 4.53.0.dev0 which includes the necessary Qwen3 model support.
80+
7581
5. Install [Flash Attention](https://github.com/Dao-AILab/flash-attention) (`flash-attn`) according to their [official instructions](https://github.com/Dao-AILab/flash-attention?tab=readme-ov-file#installation-and-features).
7682
```bash
7783
pip install flash-attn --no-build-isolation

green_bit_llm/common/model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
find_layers,
2828
apply_dtype_to,
2929
apply_quant_strategy,
30-
detect_moe_model_type
30+
detect_moe_model_type,
31+
load_config_with_rope_fix
3132
)
3233

3334
from green_bit_llm.common.utils import (
@@ -379,7 +380,9 @@ def load(
379380
raise Exception("Error: Bitorch Engine layers are not available.")
380381

381382
model_path = get_model_path(path_or_hf_repo)
382-
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
383+
384+
# NOTE: fix RoPE type missing problem in deepseek-r1-qwen3-8B model.
385+
config = load_config_with_rope_fix(model_path)
383386

384387
layer_mode, bits, group_size = get_layer_mode(path_or_hf_repo, config)
385388

green_bit_llm/common/utils.py

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import os
12
import re
3+
import json
24
from pathlib import Path
35
from typing import Dict
46

@@ -364,4 +366,78 @@ def detect_moe_model_type(config):
364366
'has_shared_experts': getattr(config, 'n_shared_experts', 0) > 0
365367
})
366368

367-
return model_info
369+
return model_info
370+
371+
372+
def fix_qwen3_rope_config(config_dict):
373+
"""
374+
Fixed the RoPE configuration issue of the Qwen3 model
375+
376+
Args:
377+
config_dict: Configuration dictionary
378+
Returns:
379+
The fixed configuration dictionary
380+
"""
381+
if 'rope_scaling' in config_dict:
382+
rope_scaling = config_dict['rope_scaling']
383+
384+
if rope_scaling is None:
385+
return config_dict
386+
387+
if isinstance(rope_scaling, dict):
388+
# Handling missing rope_type
389+
if 'rope_type' not in rope_scaling:
390+
if 'type' in rope_scaling:
391+
# If there is a 'type' field, rename it to 'rope_type'
392+
rope_scaling['rope_type'] = rope_scaling.pop('type')
393+
else:
394+
# If none, add a default rope_type
395+
rope_scaling['rope_type'] = 'default'
396+
397+
# Make sure the rope_scaling dictionary is structured correctly
398+
config_dict['rope_scaling'] = rope_scaling
399+
else:
400+
# If rope_scaling is not a dictionary, set it to None
401+
config_dict['rope_scaling'] = None
402+
403+
return config_dict
404+
405+
406+
def load_config_with_rope_fix(model_path):
407+
"""
408+
Safely load configurations and automatically fix RoPE configuration issues
409+
"""
410+
try:
411+
# Try to load normally first
412+
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
413+
return config
414+
except KeyError as e:
415+
if "rope_scaling" in str(e) and "rope_type" in str(e):
416+
print(f"RoPE configuration problem detected, trying to repair...")
417+
418+
# Read and repair the configuration file
419+
config_path = os.path.join(model_path, "config.json")
420+
if not os.path.exists(config_path):
421+
raise FileNotFoundError(f"Configuration file does not exist: {config_path}")
422+
423+
with open(config_path, 'r', encoding='utf-8') as f:
424+
config_dict = json.load(f)
425+
426+
# Fix RoPE configuration
427+
config_dict = fix_qwen3_rope_config(config_dict)
428+
429+
# Create a config from the fixed dictionary
430+
from transformers.models.auto.configuration_auto import CONFIG_MAPPING
431+
model_type = config_dict.get('model_type', 'qwen3')
432+
433+
if model_type in CONFIG_MAPPING:
434+
config_class = CONFIG_MAPPING[model_type]
435+
config = config_class.from_dict(config_dict)
436+
else:
437+
# If the model type is not in the mapping, try the generic method
438+
config = AutoConfig.from_dict(config_dict)
439+
440+
print(f"Successfully repaired RoPE configuration")
441+
return config
442+
else:
443+
raise e

green_bit_llm/evaluation/evaluate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,10 @@ def main(args):
212212

213213
# Building configs
214214
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
215+
215216
pretrain_model_config = {
216217
"trust_remote_code": True if args.trust_remote_code else None,
217-
"use_flash_attention_2": True if args.use_flash_attention_2 else None
218+
"attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None
218219
}
219220

220221
if args.eos_token is not None:

green_bit_llm/routing/libra_router

green_bit_llm/sft/finetune.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def main(args):
8787
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
8888
pretrain_model_config = {
8989
"trust_remote_code": True if args.trust_remote_code else None,
90-
"use_flash_attention_2": True if args.use_flash_attention_2 else None
90+
"attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None
9191
}
9292

9393
model, tokenizer, config = load(

green_bit_llm/sft/peft_lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def main(args):
6161
tokenizer_config = {"trust_remote_code": True if args.trust_remote_code else None}
6262
pretrain_model_config = {
6363
"trust_remote_code": True if args.trust_remote_code else None,
64-
"use_flash_attention_2": True if args.use_flash_attention_2 else None
64+
"attn_implementation": "flash_attention_2" if args.use_flash_attention_2 else None
6565
}
6666

6767
model, tokenizer, config = load(

green_bit_llm/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.2.5"
1+
__version__ = "0.2.6"

0 commit comments

Comments
 (0)