|
18 | 18 | )
|
19 | 19 |
|
20 | 20 |
|
| 21 | +class TestPrefixBucketConfig: |
| 22 | + """Test cases for PrefixBucketConfig class. |
| 23 | +
|
| 24 | + ### WRITTEN BY AI ### |
| 25 | + """ |
| 26 | + |
| 27 | + @pytest.mark.smoke |
| 28 | + def test_creation_with_valid_params(self): |
| 29 | + """Test creating PrefixBucketConfig with valid parameters. |
| 30 | +
|
| 31 | + ### WRITTEN BY AI ### |
| 32 | + """ |
| 33 | + config = PrefixBucketConfig(bucket_weight=100, prefix_count=1, prefix_tokens=5) |
| 34 | + |
| 35 | + assert config.bucket_weight == 100 |
| 36 | + assert config.prefix_count == 1 |
| 37 | + assert config.prefix_tokens == 5 |
| 38 | + |
| 39 | + @pytest.mark.sanity |
| 40 | + def test_creation_with_negative_values(self): |
| 41 | + """Test creating PrefixBucketConfig with negative values raises ValueError. |
| 42 | +
|
| 43 | + ### WRITTEN BY AI ### |
| 44 | + """ |
| 45 | + with pytest.raises(ValueError): |
| 46 | + PrefixBucketConfig(bucket_weight=-10, prefix_count=1, prefix_tokens=5) |
| 47 | + |
| 48 | + with pytest.raises(ValueError): |
| 49 | + PrefixBucketConfig(bucket_weight=100, prefix_count=-1, prefix_tokens=5) |
| 50 | + |
| 51 | + with pytest.raises(ValueError): |
| 52 | + PrefixBucketConfig(bucket_weight=100, prefix_count=1, prefix_tokens=-5) |
| 53 | + |
| 54 | + @pytest.mark.regression |
| 55 | + def test_prefix_bucket_zero_weight_error(self): |
| 56 | + """Test that zero total weight raises an error. |
| 57 | +
|
| 58 | + ### WRITTEN BY AI ### |
| 59 | + """ |
| 60 | + # Test validation error for creating PrefixBucketConfig with weight=0 |
| 61 | + with pytest.raises(ValueError): |
| 62 | + PrefixBucketConfig(bucket_weight=0, prefix_count=1, prefix_tokens=2) |
| 63 | + |
| 64 | + @pytest.mark.sanity |
| 65 | + def test_prefix_bucket_config_validation(self): |
| 66 | + """Test PrefixBucketConfig validation. |
| 67 | +
|
| 68 | + ### WRITTEN BY AI ### |
| 69 | + """ |
| 70 | + # Test valid config |
| 71 | + valid_config = PrefixBucketConfig( |
| 72 | + bucket_weight=50, prefix_count=2, prefix_tokens=3 |
| 73 | + ) |
| 74 | + assert valid_config.bucket_weight == 50 |
| 75 | + assert valid_config.prefix_count == 2 |
| 76 | + assert valid_config.prefix_tokens == 3 |
| 77 | + |
| 78 | + # Test invalid bucket_weight |
| 79 | + with pytest.raises(ValueError): |
| 80 | + PrefixBucketConfig(bucket_weight=0, prefix_count=1, prefix_tokens=2) |
| 81 | + |
| 82 | + # Test invalid prefix_count |
| 83 | + with pytest.raises(ValueError): |
| 84 | + PrefixBucketConfig(bucket_weight=100, prefix_count=0, prefix_tokens=2) |
| 85 | + |
| 86 | + # Test invalid prefix_tokens |
| 87 | + with pytest.raises(ValueError): |
| 88 | + PrefixBucketConfig(bucket_weight=100, prefix_count=1, prefix_tokens=-1) |
| 89 | + |
| 90 | + |
21 | 91 | class TestSyntheticDatasetConfig:
|
22 | 92 | """Test cases for SyntheticDatasetConfig class.
|
23 | 93 |
|
@@ -306,10 +376,11 @@ def mock_integer_range_sampler(self):
|
306 | 376 | ### WRITTEN BY AI ###
|
307 | 377 | """
|
308 | 378 | with patch("guidellm.dataset.synthetic.IntegerRangeSampler") as mock_sampler:
|
309 |
| - # Default side effect for basic iteration |
| 379 | + # Side effect for basic iteration with enough values for larger tests |
310 | 380 | def mock_sampler_side_effect(*args, **kwargs):
|
311 | 381 | mock_instance = Mock()
|
312 |
| - mock_instance.__iter__ = Mock(return_value=iter([15, 15, 15, 15, 15])) |
| 382 | + # Provide enough values for tests (up to 20 items) |
| 383 | + mock_instance.__iter__ = Mock(return_value=iter([15] * 20)) |
313 | 384 | return mock_instance
|
314 | 385 |
|
315 | 386 | mock_sampler.side_effect = mock_sampler_side_effect
|
@@ -346,6 +417,45 @@ def config_with_prefix(self):
|
346 | 417 | source="The quick brown fox jumps over the lazy dog.",
|
347 | 418 | )
|
348 | 419 |
|
| 420 | + @pytest.fixture |
| 421 | + def config_with_multiple_prefix_buckets(self): |
| 422 | + """Fixture for configuration with multiple prefix buckets. |
| 423 | +
|
| 424 | + ### WRITTEN BY AI ### |
| 425 | + """ |
| 426 | + prefix_bucket1 = PrefixBucketConfig( |
| 427 | + bucket_weight=60, prefix_count=1, prefix_tokens=2 |
| 428 | + ) |
| 429 | + prefix_bucket2 = PrefixBucketConfig( |
| 430 | + bucket_weight=40, prefix_count=1, prefix_tokens=4 |
| 431 | + ) |
| 432 | + |
| 433 | + return SyntheticDatasetConfig( |
| 434 | + prefix_buckets=[prefix_bucket1, prefix_bucket2], |
| 435 | + prompt_tokens=10, |
| 436 | + output_tokens=5, |
| 437 | + samples=10, |
| 438 | + source="The quick brown fox jumps over the lazy dog.", |
| 439 | + ) |
| 440 | + |
| 441 | + @pytest.fixture |
| 442 | + def config_with_multiple_prefix_counts(self): |
| 443 | + """Fixture for configuration with prefix_count > 1. |
| 444 | +
|
| 445 | + ### WRITTEN BY AI ### |
| 446 | + """ |
| 447 | + prefix_bucket = PrefixBucketConfig( |
| 448 | + bucket_weight=100, prefix_count=3, prefix_tokens=2 |
| 449 | + ) |
| 450 | + |
| 451 | + return SyntheticDatasetConfig( |
| 452 | + prefix_buckets=[prefix_bucket], |
| 453 | + prompt_tokens=8, |
| 454 | + output_tokens=4, |
| 455 | + samples=6, |
| 456 | + source="The quick brown fox jumps over the lazy dog.", |
| 457 | + ) |
| 458 | + |
349 | 459 | @pytest.fixture
|
350 | 460 | def complex_config(self):
|
351 | 461 | """Fixture for complex configuration with variance.
|
@@ -552,6 +662,112 @@ def test_unique_prefix_generation(self, simple_config, mock_tokenizer):
|
552 | 662 | # Verify cycle was called with vocab values
|
553 | 663 | mock_cycle.assert_called_once()
|
554 | 664 |
|
| 665 | + @pytest.mark.regression |
| 666 | + def test_multiple_prefix_buckets_distribution( |
| 667 | + self, |
| 668 | + mock_integer_range_sampler, |
| 669 | + config_with_multiple_prefix_buckets, |
| 670 | + mock_tokenizer, |
| 671 | + ): |
| 672 | + """Test distribution across multiple prefix buckets with different weights. |
| 673 | +
|
| 674 | + ### WRITTEN BY AI ### |
| 675 | + """ |
| 676 | + generator = SyntheticTextItemsGenerator( |
| 677 | + config_with_multiple_prefix_buckets, mock_tokenizer, random_seed=42 |
| 678 | + ) |
| 679 | + |
| 680 | + items = list(generator) |
| 681 | + |
| 682 | + # Verify we get the expected number of items |
| 683 | + assert len(items) == config_with_multiple_prefix_buckets.samples |
| 684 | + |
| 685 | + # Verify that prefix tokens are added to prompt_tokens_count |
| 686 | + # Since we have buckets with 2 and 4 prefix tokens, and the mock returns 15 |
| 687 | + # prompt tokens, we should see prompt_tokens_count of either 17 or 19 |
| 688 | + prefix_counts = [item["prompt_tokens_count"] for item in items] |
| 689 | + assert all(count in [17, 19] for count in prefix_counts) |
| 690 | + |
| 691 | + # Calculate expected distribution based on weights |
| 692 | + # Bucket 1: weight=60, prefix_count=1, prefix_tokens=2 |
| 693 | + # Bucket 2: weight=40, prefix_count=1, prefix_tokens=4 |
| 694 | + # Total weight = 100, samples = 10 |
| 695 | + # Bucket 1: (60/1/100) * 10 = 6 samples with 17 tokens (2 prefix + 15 prompt) |
| 696 | + # Bucket 2: (40/1/100) * 10 = 4 samples with 19 tokens (4 prefix + 15 prompt) |
| 697 | + count_17 = prefix_counts.count(17) # 2 prefix tokens |
| 698 | + count_19 = prefix_counts.count(19) # 4 prefix tokens |
| 699 | + assert count_17 == 6 |
| 700 | + assert count_19 == 4 |
| 701 | + |
| 702 | + @pytest.mark.regression |
| 703 | + def test_multiple_prefix_counts( |
| 704 | + self, |
| 705 | + mock_integer_range_sampler, |
| 706 | + config_with_multiple_prefix_counts, |
| 707 | + mock_tokenizer, |
| 708 | + ): |
| 709 | + """Test prefix buckets with prefix_count > 1. |
| 710 | +
|
| 711 | + ### WRITTEN BY AI ### |
| 712 | + """ |
| 713 | + generator = SyntheticTextItemsGenerator( |
| 714 | + config_with_multiple_prefix_counts, mock_tokenizer, random_seed=42 |
| 715 | + ) |
| 716 | + |
| 717 | + items = list(generator) |
| 718 | + |
| 719 | + # Verify we get the expected number of items |
| 720 | + assert len(items) == config_with_multiple_prefix_counts.samples |
| 721 | + |
| 722 | + # All items should have 2 prefix tokens + 15 prompt tokens = 17 total |
| 723 | + for item in items: |
| 724 | + assert item["prompt_tokens_count"] == 17 |
| 725 | + |
| 726 | + @pytest.mark.sanity |
| 727 | + def test_prefix_buckets_create_prefixes_method( |
| 728 | + self, config_with_multiple_prefix_buckets, mock_tokenizer |
| 729 | + ): |
| 730 | + """Test the _create_prefixes method directly. |
| 731 | +
|
| 732 | + ### WRITTEN BY AI ### |
| 733 | + """ |
| 734 | + generator = SyntheticTextItemsGenerator( |
| 735 | + config_with_multiple_prefix_buckets, mock_tokenizer, random_seed=42 |
| 736 | + ) |
| 737 | + |
| 738 | + # Test _create_prefixes method |
| 739 | + rand = Mock() |
| 740 | + rand.randint = Mock(return_value=0) |
| 741 | + prefixes = generator._create_prefixes(rand) |
| 742 | + |
| 743 | + # Should return a sequence of prefix token lists |
| 744 | + assert isinstance(prefixes, list) |
| 745 | + assert len(prefixes) == 10 |
| 746 | + |
| 747 | + # Each prefix should be a list of integers |
| 748 | + for prefix in prefixes: |
| 749 | + assert isinstance(prefix, list) |
| 750 | + assert all(isinstance(token, int) for token in prefix) |
| 751 | + |
| 752 | + @pytest.mark.regression |
| 753 | + def test_empty_prefix_buckets( |
| 754 | + self, mock_integer_range_sampler, simple_config, mock_tokenizer |
| 755 | + ): |
| 756 | + """Test behavior when prefix_buckets is None or empty. |
| 757 | +
|
| 758 | + ### WRITTEN BY AI ### |
| 759 | + """ |
| 760 | + # Test with None prefix_buckets (simple_config has None) |
| 761 | + generator = SyntheticTextItemsGenerator( |
| 762 | + simple_config, mock_tokenizer, random_seed=42 |
| 763 | + ) |
| 764 | + |
| 765 | + items = list(generator) |
| 766 | + |
| 767 | + # All items should have exactly the prompt tokens (no prefix) |
| 768 | + for item in items: |
| 769 | + assert item["prompt_tokens_count"] == 15 # Mock returns 15 |
| 770 | + |
555 | 771 |
|
556 | 772 | class TestSyntheticDatasetCreator:
|
557 | 773 | """Test cases for SyntheticDatasetCreator class.
|
|
0 commit comments