|
71 | 71 | ) |
72 | 72 |
|
73 | 73 | # Import common LoRA utilities |
74 | | -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 74 | +# Note: Using sys.path for standalone script compatibility. |
| 75 | +# For package installations, use: from semantic_router.training.common_lora_utils import ... |
| 76 | +_parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 77 | +if _parent_dir not in sys.path: |
| 78 | + sys.path.insert(0, _parent_dir) |
| 79 | + |
75 | 80 | from common_lora_utils import ( |
76 | 81 | clear_gpu_memory, |
77 | 82 | get_device_info, |
@@ -435,8 +440,9 @@ def main( |
435 | 440 | num_train_epochs=num_epochs, |
436 | 441 | per_device_train_batch_size=batch_size, # Configurable via parameter |
437 | 442 | per_device_eval_batch_size=batch_size, |
438 | | - gradient_accumulation_steps=16 |
439 | | - // batch_size, # Maintain effective batch size of 16 |
| 443 | + gradient_accumulation_steps=max( |
| 444 | + 1, 16 // batch_size |
| 445 | + ), # Maintain effective batch size of 16, minimum 1 |
440 | 446 | learning_rate=learning_rate, |
441 | 447 | weight_decay=0.01, |
442 | 448 | logging_dir=f"{output_dir}/logs", |
@@ -589,7 +595,7 @@ def demo_inference(model_path: str, model_name: str = "Qwen/Qwen3-0.6B"): |
589 | 595 | use_fp16 = ( |
590 | 596 | compute_capability[0] >= 7 |
591 | 597 | ) # Volta and newer support efficient FP16 |
592 | | - except: |
| 598 | + except Exception: |
593 | 599 | use_fp16 = False |
594 | 600 |
|
595 | 601 | base_model = AutoModelForCausalLM.from_pretrained( |
|
0 commit comments