diff --git a/.github/workflows/pr-io-build.yaml b/.github/workflows/pr-io-build.yaml index 7531ab329d1..321560b8663 100644 --- a/.github/workflows/pr-io-build.yaml +++ b/.github/workflows/pr-io-build.yaml @@ -29,4 +29,4 @@ jobs: run: | git config --local --get remote.origin.url cd docs/build_docs - bash build.sh latest \ No newline at end of file + bash build.sh latest diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index 88f6be5b188..50cdbc2af68 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, Generator, Iterator, List, Optional, Sized, Tuple, Union from neural_compressor.common.base_config import BaseConfig -from neural_compressor.common.utils import TuningLogger, logger +from neural_compressor.common.utils import Statistics, TuningLogger, logger __all__ = [ "Evaluator", @@ -423,6 +423,47 @@ def add_trial_result(self, trial_index: int, trial_result: Union[int, float], qu trial_record = _TrialRecord(trial_index, trial_result, quant_config) self.tuning_history.append(trial_record) + # Print tuning results table + self._print_trial_results_table(trial_index, trial_result) + + def _print_trial_results_table(self, trial_index: int, trial_result: Union[int, float]) -> None: + """Print trial results in a formatted table using Statistics class.""" + baseline_val = self.baseline if self.baseline is not None else 0.0 + baseline_str = f"{baseline_val:.4f}" if self.baseline is not None else "N/A" + target_threshold_str = ( + f"{baseline_val * (1 - self.tuning_config.tolerable_loss):.4f}" if self.baseline is not None else "N/A" + ) + + # Calculate relative loss if baseline is available + relative_loss_val = 0.0 + relative_loss_str = "N/A" + if self.baseline is not None: + relative_loss_val = (baseline_val - trial_result) / baseline_val + relative_loss_str = f"{relative_loss_val*100:.2f}%" + + # Get best result so far + best_result = max(record.trial_result for record in self.tuning_history) + + # Status indicator with emoji + if self.baseline is not None and trial_result >= (baseline_val * (1 - self.tuning_config.tolerable_loss)): + status = "✅ PASSED" + else: + status = "❌ FAILED" + + # Prepare data for Statistics table with combined fields + field_names = ["📊 Metric", "📈 Value"] + output_data = [ + ["Trial / Progress", f"{len(self.tuning_history)}/{self.tuning_config.max_trials}"], + ["Baseline / Target", f"{baseline_str} / {target_threshold_str}"], + ["Current / Status", f"{trial_result:.4f} | {status}"], + ["Best / Relative Loss", f"{best_result:.4f} / {relative_loss_str}"], + ] + + # Use Statistics class to print the table + Statistics( + output_data, header=f"🎯 Auto-Tune Trial #{trial_index} Results", field_names=field_names + ).print_stat() + def set_baseline(self, baseline: float): """Set the baseline value for auto-tune. @@ -488,4 +529,10 @@ def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger config_loader = ConfigLoader(config_set=tuning_config.config_set, sampler=tuning_config.sampler) tuning_logger = TuningLogger() tuning_monitor = TuningMonitor(tuning_config) + + # Update max_trials based on actual number of available configurations + actual_config_count = len(config_loader.config_set) + if tuning_config.max_trials > actual_config_count: + tuning_config.max_trials = actual_config_count + return config_loader, tuning_logger, tuning_monitor diff --git a/test/3x/torch/quantization/weight_only/test_autoround.py b/test/3x/torch/quantization/weight_only/test_autoround.py index 3eb5d52850a..aeb749dad8a 100644 --- a/test/3x/torch/quantization/weight_only/test_autoround.py +++ b/test/3x/torch/quantization/weight_only/test_autoround.py @@ -370,6 +370,7 @@ def test_scheme(self, scheme): @pytest.mark.skipif(not ct_installed, reason="The compressed-tensors module is not installed.") + @pytest.mark.skipif(Version(auto_round.__version__) < Version("0.9.0"), reason="target bits is not supported.") def test_target_bits(self): fp32_model = AutoModelForCausalLM.from_pretrained( "facebook/opt-125m", @@ -401,6 +402,8 @@ def test_target_bits(self): "model is not quantized correctly, please check." + @pytest.mark.skipif(not ct_installed, reason="The compressed-tensors module is not installed.") + @pytest.mark.skipif(Version(auto_round.__version__) < Version("0.9.0"), reason="target bits is not supported.") def test_target_bits_autotune(self): from neural_compressor.torch.quantization import TuningConfig, autotune baseline = 1 diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index ba7465871f8..bac4a86bbd9 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -299,11 +299,11 @@ def eval_acc_fn(model): self.assertIsNotNone(best_model) # case 4 - # Where tolerable_loss is 0.01 and accuracy meets the goal, we expect best model is None. + # Where tolerable_loss is 0.01 and accuracy doesn't meets the goal, best_model is the best model in trails. acc_res_lst = baseline + [0.9] * 2 + [0.9] + [0.9] custom_tune_config = TuningConfig(config_set=[RTNConfig(bits=[4, 6, 5, 8])], tolerable_loss=0.01) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) - self.assertIsNone(best_model) + self.assertIsNotNone(best_model) @reset_tuning_target def test_rtn_double_quant_config_set(self) -> None: @@ -335,7 +335,7 @@ def eval_acc_fn(model) -> float: config_set=get_rtn_double_quant_config_set(), max_trials=10, tolerable_loss=-1 ) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) - self.assertIsNone(best_model) + self.assertIsNotNone(best_model) @patch("neural_compressor.torch.utils.constants.DOUBLE_QUANT_CONFIGS", FAKE_DOUBLE_QUANT_CONFIGS) def test_rtn_double_quant_config_set3(self) -> None: @@ -350,7 +350,7 @@ def eval_acc_fn(model) -> float: custom_tune_config = TuningConfig(config_set=get_rtn_double_quant_config_set(), tolerable_loss=-1) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fn=eval_acc_fn) - self.assertIsNone(best_model) + self.assertIsNotNone(best_model) def test_woq_tuning(self): from neural_compressor.torch.quantization import autotune, get_woq_tuning_config @@ -374,7 +374,7 @@ def eval_acc_fn(model): run_args=(dataloader, True), # run_args should be a tuple, example_inputs=example_inputs, ) - self.assertIsNone(best_model) + self.assertIsNotNone(best_model) @reset_tuning_target def test_autotune_mixed_precision_default(self):