diff --git a/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/AutoCompressorTests.cs b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/AutoCompressorTests.cs new file mode 100644 index 000000000..db857a715 --- /dev/null +++ b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/AutoCompressorTests.cs @@ -0,0 +1,650 @@ +using AiDotNet.RetrievalAugmentedGeneration.ContextCompression; +using AiDotNet.RetrievalAugmentedGeneration.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace AiDotNetTests.UnitTests.RetrievalAugmentedGeneration.ContextCompression +{ + public class AutoCompressorTests : ContextCompressorTestBase + { + #region Constructor Tests + + [Fact] + public void Constructor_WithDefaultParameters_CreatesInstance() + { + // Arrange & Act + var compressor = new AutoCompressor(); + + // Assert + Assert.NotNull(compressor); + } + + [Fact] + public void Constructor_WithValidParameters_CreatesInstance() + { + // Arrange & Act + var compressor = new AutoCompressor(maxOutputLength: 1000, compressionRatio: 0.7); + + // Assert + Assert.NotNull(compressor); + } + + [Fact] + public void Constructor_WithZeroMaxOutputLength_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new AutoCompressor(maxOutputLength: 0)); + } + + [Fact] + public void Constructor_WithNegativeMaxOutputLength_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new AutoCompressor(maxOutputLength: -100)); + } + + [Fact] + public void Constructor_WithZeroCompressionRatio_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new AutoCompressor(maxOutputLength: 500, compressionRatio: 0)); + } + + [Fact] + public void Constructor_WithNegativeCompressionRatio_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new AutoCompressor(maxOutputLength: 500, compressionRatio: -0.5)); + } + + [Fact] + public void Constructor_WithCompressionRatioGreaterThanOne_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new AutoCompressor(maxOutputLength: 500, compressionRatio: 1.5)); + } + + [Fact] + public void Constructor_WithCompressionRatioEqualToOne_CreatesInstance() + { + // Arrange & Act + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 1.0); + + // Assert + Assert.NotNull(compressor); + } + + #endregion + + #region Basic Functionality Tests + + [Fact] + public void Compress_WithValidDocuments_ReturnsCompressedDocuments() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Equal(documents.Count, result.Count); + } + + [Fact] + public void Compress_WithNullDocuments_ThrowsArgumentNullException() + { + // Arrange + var compressor = new AutoCompressor(); + var query = "test query"; + + // Act & Assert + Assert.Throws(() => + compressor.Compress(null, query)); + } + + [Fact] + public void Compress_WithNullQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, null)); + } + + [Fact] + public void Compress_WithEmptyQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, string.Empty)); + } + + [Fact] + public void Compress_WithWhitespaceQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, " ")); + } + + [Fact] + public void Compress_WithEmptyDocumentList_ReturnsEmptyList() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = new List>(); + var query = "test query"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Empty(result); + } + + #endregion + + #region Compression Quality Tests + + [Fact] + public void Compress_ReducesDocumentSize() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 300, compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + AssertCompressed(documents, result, allowEqual: true); + } + + [Fact] + public void Compress_RespectsMaxOutputLength() + { + // Arrange + var maxLength = 200; + var compressor = new AutoCompressor(maxOutputLength: maxLength, compressionRatio: 0.8); + var longDoc = CreateLargeDocument("large"); + var documents = new List> { longDoc }; + var query = "document content"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length <= maxLength, + $"Compressed length {result[0].Content.Length} should be <= {maxLength}"); + } + + [Fact] + public void Compress_WithLowCompressionRatio_RetainsFewSentences() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 1000, compressionRatio: 0.3); + var document = CreateDocumentWithLength("doc", 10); + var documents = new List> { document }; + var query = "test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length < document.Content.Length); + } + + [Fact] + public void Compress_WithHighCompressionRatio_RetainsMoreContent() + { + // Arrange + var lowCompressor = new AutoCompressor(maxOutputLength: 2000, compressionRatio: 0.3); + var highCompressor = new AutoCompressor(maxOutputLength: 2000, compressionRatio: 0.7); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var lowResult = lowCompressor.Compress(documents, query); + var highResult = highCompressor.Compress(documents, query); + + // Assert + var lowLength = lowResult.Sum(d => d.Content.Length); + var highLength = highResult.Sum(d => d.Content.Length); + Assert.True(highLength >= lowLength, + "Higher compression ratio should produce longer or equal output"); + } + + [Fact] + public void Compress_PrioritizesQueryRelevantContent() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 150, compressionRatio: 0.4); + var document = new Document("doc", + "Machine learning is powerful. The weather is nice today. Neural networks learn patterns. Birds are flying. Deep learning uses layers."); + var documents = new List> { document }; + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + var content = result[0].Content.ToLowerInvariant(); + // Should prioritize sentences with query terms + Assert.True( + content.Contains("machine") || + content.Contains("learning") || + content.Contains("neural")); + } + + [Fact] + public void Compress_PreservesMetadata() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + AssertMetadataPreserved(documents, result); + } + + [Fact] + public void Compress_PreservesRelevanceScores() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + AssertRelevanceScoresPreserved(documents, result); + } + + [Fact] + public void Compress_PreservesDocumentIds() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + for (int i = 0; i < documents.Count; i++) + { + Assert.Equal(documents[i].Id, result[i].Id); + } + } + + #endregion + + #region Edge Cases Tests + + [Fact] + public void Compress_WithEmptyDocument_ReturnsEmptyContent() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = new List> + { + new Document("empty", string.Empty) + }; + var query = "test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.Equal(string.Empty, result[0].Content); + } + + [Fact] + public void Compress_WithWhitespaceOnlyDocument_ReturnsEmptyContent() + { + // Arrange + var compressor = new AutoCompressor(); + var documents = new List> + { + new Document("whitespace", " \t\n ") + }; + var query = "test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.Equal(string.Empty, result[0].Content); + } + + [Fact] + public void Compress_WithSingleSentence_HandlesCorrectly() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 0.5); + var documents = new List> + { + new Document("single", "Machine learning is important.") + }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithVeryLargeDocument_CompressesSuccessfully() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 0.3); + var largeDoc = CreateLargeDocument("large"); + var documents = new List> { largeDoc }; + var query = "document content long"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length <= 500); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithUnicodeContent_HandlesCorrectly() + { + // Arrange + var compressor = new AutoCompressor(); + var unicodeDoc = CreateUnicodeDocument("unicode"); + var documents = new List> { unicodeDoc }; + var query = "学习 learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithSpecialCharacters_HandlesCorrectly() + { + // Arrange + var compressor = new AutoCompressor(); + var specialDoc = CreateSpecialCharDocument("special"); + var documents = new List> { specialDoc }; + var query = "special chars testing"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithMultipleDocuments_ProcessesAllDocuments() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 300, compressionRatio: 0.5); + var documents = new List>(); + for (int i = 0; i < 10; i++) + { + documents.Add(CreateDocumentWithLength($"doc{i}", 10)); + } + var query = "test sentence content"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Equal(10, result.Count); + foreach (var doc in result) + { + Assert.NotNull(doc.Content); + } + } + + [Fact] + public void Compress_WithDocumentSmallerThanMaxLength_MayReturnOriginal() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 1000, compressionRatio: 0.9); + var shortDoc = new Document("short", "Short document with minimal content."); + var documents = new List> { shortDoc }; + var query = "test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + #endregion + + #region Sentence Scoring Tests + + [Fact] + public void Compress_PrioritizesEarlySentences() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 100, compressionRatio: 0.3); + var document = new Document("doc", + "First sentence with important content. Second sentence also relevant. Third sentence here. Fourth sentence present. Fifth sentence exists."); + var documents = new List> { document }; + var query = "content relevant"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + // Early sentences with query terms should be prioritized + } + + [Fact] + public void Compress_ScoresBasedOnQueryMatch() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 200, compressionRatio: 0.5); + var document = new Document("doc", + "Machine learning is a field of AI. The weather is sunny today. Neural networks process information. Birds are singing outside."); + var documents = new List> { document }; + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + var content = result[0].Content.ToLowerInvariant(); + // Should include sentences matching query + int matchCount = 0; + if (content.Contains("machine") || content.Contains("learning")) matchCount++; + if (content.Contains("neural") || content.Contains("networks")) matchCount++; + Assert.True(matchCount > 0, "Compressed content should include query-relevant sentences"); + } + + #endregion + + #region Integration Tests + + [Fact] + public void Compress_WithDifferentCompressionRatios_ProducesExpectedLengths() + { + // Arrange + var documents = CreateSampleDocuments(); + var query = "machine learning"; + var ratios = new[] { 0.3, 0.5, 0.7 }; + var results = new List(); + + // Act + foreach (var ratio in ratios) + { + var compressor = new AutoCompressor(maxOutputLength: 2000, compressionRatio: ratio); + var compressed = compressor.Compress(documents, query); + results.Add(compressed.Sum(d => d.Content.Length)); + } + + // Assert + for (int i = 0; i < ratios.Length - 1; i++) + { + Assert.True(results[i] <= results[i + 1], + $"Higher compression ratio should produce longer output. Got {results[i]} and {results[i + 1]}"); + } + } + + [Fact] + public void Compress_WithDifferentMaxLengths_RespectsBounds() + { + // Arrange + var largeDoc = CreateLargeDocument("large"); + var documents = new List> { largeDoc }; + var query = "document content"; + var maxLengths = new[] { 100, 300, 500 }; + + // Act & Assert + foreach (var maxLength in maxLengths) + { + var compressor = new AutoCompressor(maxOutputLength: maxLength, compressionRatio: 0.5); + var result = compressor.Compress(documents, query); + Assert.Single(result); + Assert.True(result[0].Content.Length <= maxLength, + $"Result length {result[0].Content.Length} should be <= {maxLength}"); + } + } + + [Fact] + public void Compress_WithDifferentQueries_ProducesDifferentResults() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 300, compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query1 = "machine learning neural"; + var query2 = "weather sunny outdoor"; + + // Act + var result1 = compressor.Compress(documents, query1); + var result2 = compressor.Compress(documents, query2); + + // Assert + var content1 = string.Join(" ", result1.Select(d => d.Content)); + var content2 = string.Join(" ", result2.Select(d => d.Content)); + Assert.NotEqual(content1, content2); + } + + [Fact] + public void Compress_MultipleInvocations_ProducesSameResults() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result1 = compressor.Compress(documents, query); + var result2 = compressor.Compress(documents, query); + + // Assert + Assert.Equal(result1.Count, result2.Count); + for (int i = 0; i < result1.Count; i++) + { + Assert.Equal(result1[i].Content, result2[i].Content); + } + } + + [Fact] + public void Compress_WithFloatType_WorksCorrectly() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 0.5f); + var documents = new List> + { + new Document("doc1", "Machine learning is powerful. Neural networks are important.") + { + RelevanceScore = 0.9f, + HasRelevanceScore = true + } + }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + } + + #endregion + + #region Performance Tests + + [Fact] + public void Compress_With100KBDocument_CompletesInReasonableTime() + { + // Arrange + var compressor = new AutoCompressor(maxOutputLength: 500, compressionRatio: 0.3); + // Create a document > 100KB + var largeContent = string.Join(" ", Enumerable.Repeat("This is a sentence with some content to test compression performance.", 3000)); + var largeDoc = new Document("huge", largeContent); + var documents = new List> { largeDoc }; + var query = "content compression test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length <= 500); + Assert.NotEmpty(result[0].Content); + } + + #endregion + } +} diff --git a/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/ContextCompressorTestBase.cs b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/ContextCompressorTestBase.cs new file mode 100644 index 000000000..733cb6d44 --- /dev/null +++ b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/ContextCompressorTestBase.cs @@ -0,0 +1,149 @@ +using AiDotNet.Helpers; +using AiDotNet.Interfaces; +using AiDotNet.RetrievalAugmentedGeneration.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace AiDotNetTests.UnitTests.RetrievalAugmentedGeneration.ContextCompression +{ + /// + /// Base class for context compressor tests with shared test utilities. + /// + public abstract class ContextCompressorTestBase + { + protected static readonly INumericOperations NumOps = MathHelper.GetNumericOperations(); + + /// + /// Creates sample documents for testing. + /// + protected List> CreateSampleDocuments() + { + return new List> + { + new Document("doc1", "Machine learning is a subset of artificial intelligence. It enables computers to learn from data without explicit programming. Neural networks are a key component of deep learning.") + { + Metadata = new Dictionary { { "source", "ml_intro.txt" } }, + RelevanceScore = 0.9, + HasRelevanceScore = true + }, + new Document("doc2", "Python is a popular programming language for machine learning. Libraries like TensorFlow and PyTorch make it easy to build neural networks. Data scientists prefer Python for its simplicity.") + { + Metadata = new Dictionary { { "source", "python_ml.txt" } }, + RelevanceScore = 0.85, + HasRelevanceScore = true + }, + new Document("doc3", "The weather today is sunny and warm. Many people enjoy outdoor activities during such pleasant conditions. Parks are crowded on sunny days.") + { + Metadata = new Dictionary { { "source", "weather.txt" } }, + RelevanceScore = 0.1, + HasRelevanceScore = true + } + }; + } + + /// + /// Creates a document with specified length for testing compression. + /// + protected Document CreateDocumentWithLength(string id, int sentenceCount) + { + var sentences = new List(); + for (int i = 0; i < sentenceCount; i++) + { + sentences.Add($"This is sentence number {i + 1} containing some test content."); + } + return new Document(id, string.Join(" ", sentences)); + } + + /// + /// Creates a very large document for testing edge cases (100KB+). + /// + protected Document CreateLargeDocument(string id) + { + var content = string.Join(" ", Enumerable.Repeat("This is a very long document with lots of content that needs to be compressed effectively.", 2000)); + return new Document(id, content); + } + + /// + /// Creates a document with Unicode content. + /// + protected Document CreateUnicodeDocument(string id) + { + return new Document(id, "机器学习是人工智能的子集。El aprendizaje automático es poderoso. Машинное обучение важно. 🤖 Emojis are Unicode too!"); + } + + /// + /// Creates a document with special characters. + /// + protected Document CreateSpecialCharDocument(string id) + { + return new Document(id, "Special chars: @#$%^&*()! Testing with symbols <>=+- and punctuation... Multiple?? Questions!! End."); + } + + /// + /// Asserts that documents have been compressed. + /// + protected void AssertCompressed(List> original, List> compressed, bool allowEqual = false) + { + Assert.NotNull(compressed); + + var originalTotalLength = original.Sum(d => d.Content.Length); + var compressedTotalLength = compressed.Sum(d => d.Content.Length); + + if (allowEqual) + { + Assert.True(compressedTotalLength <= originalTotalLength, + $"Compressed length ({compressedTotalLength}) should be less than or equal to original ({originalTotalLength})"); + } + else + { + Assert.True(compressedTotalLength < originalTotalLength, + $"Compressed length ({compressedTotalLength}) should be less than original ({originalTotalLength})"); + } + } + + /// + /// Calculates the compression ratio. + /// + protected double CalculateCompressionRatio(List> original, List> compressed) + { + var originalLength = original.Sum(d => d.Content.Length); + var compressedLength = compressed.Sum(d => d.Content.Length); + return originalLength > 0 ? (double)compressedLength / originalLength : 0; + } + + /// + /// Asserts that metadata is preserved. + /// + protected void AssertMetadataPreserved(List> original, List> compressed) + { + for (int i = 0; i < Math.Min(original.Count, compressed.Count); i++) + { + if (original[i].Metadata != null && compressed[i].Metadata != null) + { + foreach (var kvp in original[i].Metadata) + { + Assert.True(compressed[i].Metadata.ContainsKey(kvp.Key), + $"Metadata key '{kvp.Key}' should be preserved"); + } + } + } + } + + /// + /// Asserts that relevance scores are preserved. + /// + protected void AssertRelevanceScoresPreserved(List> original, List> compressed) + { + for (int i = 0; i < Math.Min(original.Count, compressed.Count); i++) + { + if (original[i].HasRelevanceScore) + { + Assert.True(compressed[i].HasRelevanceScore, "HasRelevanceScore should be preserved"); + Assert.Equal(original[i].RelevanceScore, compressed[i].RelevanceScore); + } + } + } + } +} diff --git a/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/DocumentSummarizerTests.cs b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/DocumentSummarizerTests.cs new file mode 100644 index 000000000..1f138df8f --- /dev/null +++ b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/DocumentSummarizerTests.cs @@ -0,0 +1,543 @@ +using AiDotNet.Helpers; +using AiDotNet.RetrievalAugmentedGeneration.ContextCompression; +using AiDotNet.RetrievalAugmentedGeneration.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace AiDotNetTests.UnitTests.RetrievalAugmentedGeneration.ContextCompression +{ + public class DocumentSummarizerTests : ContextCompressorTestBase + { + #region Constructor Tests + + [Fact] + public void Constructor_WithValidParameters_CreatesInstance() + { + // Arrange & Act + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + + // Assert + Assert.NotNull(summarizer); + } + + [Fact] + public void Constructor_WithNullNumericOperations_ThrowsArgumentNullException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new DocumentSummarizer(null, maxSummaryLength: 500)); + } + + [Fact] + public void Constructor_WithZeroMaxSummaryLength_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new DocumentSummarizer(NumOps, maxSummaryLength: 0)); + } + + [Fact] + public void Constructor_WithNegativeMaxSummaryLength_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new DocumentSummarizer(NumOps, maxSummaryLength: -100)); + } + + [Fact] + public void Constructor_WithCustomMaxSummaryLength_CreatesInstance() + { + // Arrange & Act + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 1000); + + // Assert + Assert.NotNull(summarizer); + } + + #endregion + + #region Basic Functionality Tests + + [Fact] + public void Compress_WithValidDocuments_ReturnsSummarizedDocuments() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 200); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Equal(documents.Count, result.Count); + } + + [Fact] + public void Compress_WithNullDocuments_ThrowsArgumentNullException() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var query = "test query"; + + // Act & Assert + Assert.Throws(() => + summarizer.Compress(null, query)); + } + + [Fact] + public void Compress_WithNullQuery_ThrowsArgumentException() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + summarizer.Compress(documents, null)); + } + + [Fact] + public void Compress_WithEmptyQuery_ThrowsArgumentException() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + summarizer.Compress(documents, string.Empty)); + } + + [Fact] + public void Compress_WithEmptyDocumentList_ReturnsEmptyList() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = new List>(); + var query = "test query"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Empty(result); + } + + #endregion + + #region Summarization Quality Tests + + [Fact] + public void Compress_LongDocument_RespectsSummaryLength() + { + // Arrange + var maxLength = 200; + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: maxLength); + var longText = string.Join(" ", Enumerable.Repeat("This is a sentence about machine learning and artificial intelligence.", 50)); + var documents = new List> + { + new Document("long", longText) + }; + var query = "machine learning"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length <= maxLength, + $"Summary length {result[0].Content.Length} should be <= {maxLength}"); + } + + [Fact] + public void Compress_ShortDocument_ReturnsOriginal() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var shortText = "This is a short document."; + var documents = new List> + { + new Document("short", shortText) + }; + var query = "short"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.Equal(shortText, result[0].Content); + } + + [Fact] + public void Compress_WithQueryTerms_PrioritizesRelevantSentences() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 100); + var text = "Machine learning is important. The sky is blue. Neural networks are powerful. Birds can fly. Deep learning is advanced."; + var documents = new List> + { + new Document("doc", text) + }; + var query = "machine learning neural networks"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + var summary = result[0].Content.ToLowerInvariant(); + // Should contain at least one query term + Assert.True( + summary.Contains("machine") || + summary.Contains("learning") || + summary.Contains("neural"), + "Summary should prioritize sentences with query terms"); + } + + [Fact] + public void Compress_PreservesMetadata() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + AssertMetadataPreserved(documents, result); + } + + [Fact] + public void Compress_PreservesRelevanceScores() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + AssertRelevanceScoresPreserved(documents, result); + } + + [Fact] + public void Compress_PreservesDocumentIds() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + for (int i = 0; i < documents.Count; i++) + { + Assert.Equal(documents[i].Id, result[i].Id); + } + } + + #endregion + + #region Edge Cases Tests + + [Fact] + public void Compress_WithEmptyDocument_ReturnsEmptyDocument() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = new List> + { + new Document("empty", string.Empty) + }; + var query = "test"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.Equal(string.Empty, result[0].Content); + } + + [Fact] + public void Compress_WithVeryLongSingleSentence_TruncatesCorrectly() + { + // Arrange + var maxLength = 100; + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: maxLength); + var longSentence = new string('a', 500); + var documents = new List> + { + new Document("long", longSentence) + }; + var query = "test"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length <= maxLength); + } + + [Fact] + public void Compress_WithVeryLargeDocument_SummarizesSuccessfully() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var largeDoc = CreateLargeDocument("large"); + var documents = new List> { largeDoc }; + var query = "document content"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length <= 500); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithUnicodeContent_HandlesCorrectly() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var unicodeDoc = CreateUnicodeDocument("unicode"); + var documents = new List> { unicodeDoc }; + var query = "学习"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithSpecialCharacters_HandlesCorrectly() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var specialDoc = CreateSpecialCharDocument("special"); + var documents = new List> { specialDoc }; + var query = "special"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithMultipleDocuments_ProcessesAllDocuments() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 200); + var documents = new List>(); + for (int i = 0; i < 10; i++) + { + documents.Add(CreateDocumentWithLength($"doc{i}", 20)); + } + var query = "test content"; + + // Act + var result = summarizer.Compress(documents, query); + + // Assert + Assert.Equal(10, result.Count); + foreach (var doc in result) + { + Assert.NotNull(doc.Content); + } + } + + #endregion + + #region SummarizeText Method Tests + + [Fact] + public void SummarizeText_WithValidInput_SummarizesText() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 100); + var text = string.Join(" ", Enumerable.Repeat("This is a sentence about machine learning.", 20)); + + // Act + var result = summarizer.SummarizeText(text); + + // Assert + Assert.NotNull(result); + Assert.True(result.Length <= 100); + } + + [Fact] + public void SummarizeText_WithEmptyText_ReturnsEmpty() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + + // Act + var result = summarizer.SummarizeText(string.Empty); + + // Assert + Assert.Equal(string.Empty, result); + } + + [Fact] + public void SummarizeText_WithNullText_ReturnsNull() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + + // Act + var result = summarizer.SummarizeText(null); + + // Assert + Assert.Null(result); + } + + [Fact] + public void SummarizeText_WithQueryTerms_PrioritizesRelevantContent() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 150); + var text = "Machine learning is powerful. The weather is nice. Neural networks are important. Birds fly high. Deep learning is advanced."; + var queryTerms = new List { "machine", "learning", "neural" }; + + // Act + var result = summarizer.SummarizeText(text, queryTerms); + + // Assert + Assert.NotNull(result); + var lowerResult = result.ToLowerInvariant(); + Assert.True( + lowerResult.Contains("machine") || + lowerResult.Contains("learning") || + lowerResult.Contains("neural")); + } + + [Fact] + public void SummarizeText_TextShorterThanMaxLength_ReturnsOriginal() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var text = "Short text."; + + // Act + var result = summarizer.SummarizeText(text); + + // Assert + Assert.Equal(text, result); + } + + #endregion + + #region Summarize Method Tests + + [Fact] + public void Summarize_WithValidDocuments_ReturnsSummarizedDocuments() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 200); + var documents = CreateSampleDocuments(); + + // Act + var result = summarizer.Summarize(documents); + + // Assert + Assert.NotNull(result); + Assert.Equal(documents.Count, result.Count); + } + + [Fact] + public void Summarize_WithNullDocuments_ThrowsArgumentNullException() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + + // Act & Assert + Assert.Throws(() => + summarizer.Summarize(null)); + } + + [Fact] + public void Summarize_PreservesMetadata() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + + // Act + var result = summarizer.Summarize(documents); + + // Assert + AssertMetadataPreserved(documents, result); + } + + #endregion + + #region Integration Tests + + [Fact] + public void Compress_WithDifferentMaxLengths_ProducesDifferentResults() + { + // Arrange + var longText = string.Join(" ", Enumerable.Repeat("This is a sentence about machine learning.", 50)); + var documents = new List> + { + new Document("doc", longText) + }; + var query = "machine learning"; + + var summarizer1 = new DocumentSummarizer(NumOps, maxSummaryLength: 100); + var summarizer2 = new DocumentSummarizer(NumOps, maxSummaryLength: 300); + + // Act + var result1 = summarizer1.Compress(documents, query); + var result2 = summarizer2.Compress(documents, query); + + // Assert + Assert.True(result1[0].Content.Length <= 100); + Assert.True(result2[0].Content.Length <= 300); + Assert.True(result2[0].Content.Length >= result1[0].Content.Length); + } + + [Fact] + public void Compress_MultipleInvocations_ProducesSameResults() + { + // Arrange + var summarizer = new DocumentSummarizer(NumOps, maxSummaryLength: 500); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result1 = summarizer.Compress(documents, query); + var result2 = summarizer.Compress(documents, query); + + // Assert + Assert.Equal(result1.Count, result2.Count); + for (int i = 0; i < result1.Count; i++) + { + Assert.Equal(result1[i].Content, result2[i].Content); + } + } + + #endregion + } +} diff --git a/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/LLMContextCompressorTests.cs b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/LLMContextCompressorTests.cs new file mode 100644 index 000000000..e83fd9b2e --- /dev/null +++ b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/LLMContextCompressorTests.cs @@ -0,0 +1,522 @@ +using AiDotNet.RetrievalAugmentedGeneration.ContextCompression; +using AiDotNet.RetrievalAugmentedGeneration.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace AiDotNetTests.UnitTests.RetrievalAugmentedGeneration.ContextCompression +{ + public class LLMContextCompressorTests : ContextCompressorTestBase + { + #region Constructor Tests + + [Fact] + public void Constructor_WithValidCompressionRatio_CreatesInstance() + { + // Arrange & Act + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + + // Assert + Assert.NotNull(compressor); + } + + [Fact] + public void Constructor_WithCompressionRatioZero_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new LLMContextCompressor(compressionRatio: 0)); + } + + [Fact] + public void Constructor_WithCompressionRatioNegative_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new LLMContextCompressor(compressionRatio: -0.5)); + } + + [Fact] + public void Constructor_WithCompressionRatioGreaterThanOne_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new LLMContextCompressor(compressionRatio: 1.5)); + } + + [Fact] + public void Constructor_WithCompressionRatioOne_CreatesInstance() + { + // Arrange & Act + var compressor = new LLMContextCompressor(compressionRatio: 1.0); + + // Assert + Assert.NotNull(compressor); + } + + #endregion + + #region Basic Functionality Tests + + [Fact] + public void Compress_WithValidDocuments_ReturnsCompressedDocuments() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Equal(documents.Count, result.Count); + AssertCompressed(documents, result); + } + + [Fact] + public void Compress_WithNullDocuments_ThrowsArgumentNullException() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var query = "test query"; + + // Act & Assert + Assert.Throws(() => + compressor.Compress(null, query)); + } + + [Fact] + public void Compress_WithNullQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, null)); + } + + [Fact] + public void Compress_WithEmptyQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, string.Empty)); + } + + [Fact] + public void Compress_WithWhitespaceQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, " ")); + } + + [Fact] + public void Compress_WithEmptyDocumentList_ReturnsEmptyList() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = new List>(); + var query = "test query"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Empty(result); + } + + #endregion + + #region Compression Quality Tests + + [Fact] + public void Compress_PreservesRelevantInformation() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + // Check that compressed content contains query terms + var compressedContent = string.Join(" ", result.Select(d => d.Content)).ToLowerInvariant(); + Assert.Contains("machine learning", compressedContent); + } + + [Fact] + public void Compress_WithLowCompressionRatio_RetainsFewSentences() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.3); + var document = new Document("doc1", + "First sentence about machine learning. Second sentence about neural networks. Third sentence about data science. Fourth sentence about Python programming."); + var documents = new List> { document }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length < document.Content.Length); + } + + [Fact] + public void Compress_WithHighCompressionRatio_RetainsMoreContent() + { + // Arrange + var lowCompressor = new LLMContextCompressor(compressionRatio: 0.3); + var highCompressor = new LLMContextCompressor(compressionRatio: 0.7); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var lowResult = lowCompressor.Compress(documents, query); + var highResult = highCompressor.Compress(documents, query); + + // Assert + var lowLength = lowResult.Sum(d => d.Content.Length); + var highLength = highResult.Sum(d => d.Content.Length); + Assert.True(highLength > lowLength); + } + + [Fact] + public void Compress_PreservesMetadata() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + AssertMetadataPreserved(documents, result); + } + + [Fact] + public void Compress_PreservesRelevanceScores() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + AssertRelevanceScoresPreserved(documents, result); + } + + [Fact] + public void Compress_PreservesDocumentIds() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + for (int i = 0; i < documents.Count; i++) + { + Assert.Equal(documents[i].Id, result[i].Id); + } + } + + #endregion + + #region Edge Cases Tests + + [Fact] + public void Compress_WithEmptyDocument_ReturnsEmptyDocument() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = new List> + { + new Document("empty", string.Empty) + }; + var query = "test query"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.Equal(string.Empty, result[0].Content); + } + + [Fact] + public void Compress_WithSingleSentenceDocument_RetainsSentence() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = new List> + { + new Document("single", "This is a single sentence about machine learning.") + }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithDocumentSmallerThanTarget_ReturnsOriginal() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.9); + var documents = new List> + { + new Document("small", "Short text.") + }; + var query = "test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithVeryLargeDocument_CompressesSuccessfully() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.3); + var largeDoc = CreateLargeDocument("large"); + var documents = new List> { largeDoc }; + var query = "document content"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.True(result[0].Content.Length < largeDoc.Content.Length); + Assert.True(result[0].Content.Length > 0); + } + + [Fact] + public void Compress_WithUnicodeContent_HandlesCorrectly() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var unicodeDoc = CreateUnicodeDocument("unicode"); + var documents = new List> { unicodeDoc }; + var query = "学习 learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithSpecialCharacters_HandlesCorrectly() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var specialDoc = CreateSpecialCharDocument("special"); + var documents = new List> { specialDoc }; + var query = "special chars"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Single(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithMultipleDocuments_ProcessesAllDocuments() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = new List>(); + for (int i = 0; i < 10; i++) + { + documents.Add(CreateDocumentWithLength($"doc{i}", 5)); + } + var query = "test content"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.Equal(10, result.Count); + foreach (var doc in result) + { + Assert.NotNull(doc.Content); + } + } + + #endregion + + #region CompressText Method Tests + + [Fact] + public void CompressText_WithValidInput_CompressesText() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var text = "Machine learning is powerful. Neural networks are important. Python is useful. Data science is growing."; + var query = "machine learning neural"; + + // Act + var result = compressor.CompressText(query, text); + + // Assert + Assert.NotNull(result); + Assert.True(result.Length < text.Length); + } + + [Fact] + public void CompressText_WithEmptyText_ReturnsEmpty() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var query = "test"; + + // Act + var result = compressor.CompressText(query, string.Empty); + + // Assert + Assert.Equal(string.Empty, result); + } + + [Fact] + public void CompressText_WithNullText_ReturnsNull() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var query = "test"; + + // Act + var result = compressor.CompressText(query, null); + + // Assert + Assert.Null(result); + } + + [Fact] + public void CompressText_SelectsMostRelevantSentences() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.4); + var text = "Machine learning is a subset of AI. The weather is nice today. Neural networks learn patterns. Birds are flying outside. Deep learning uses multiple layers."; + var query = "machine learning neural networks"; + + // Act + var result = compressor.CompressText(query, text); + + // Assert + Assert.NotNull(result); + // Should prefer sentences with query terms + var lowerResult = result.ToLowerInvariant(); + Assert.True(lowerResult.Contains("machine") || lowerResult.Contains("learning") || lowerResult.Contains("neural")); + } + + #endregion + + #region Integration Tests + + [Fact] + public void Compress_WithDifferentCompressionRatios_ProducesExpectedRatios() + { + // Arrange + var documents = CreateSampleDocuments(); + var query = "machine learning"; + var ratios = new[] { 0.3, 0.5, 0.7 }; + var results = new List(); + + // Act + foreach (var ratio in ratios) + { + var compressor = new LLMContextCompressor(compressionRatio: ratio); + var compressed = compressor.Compress(documents, query); + var actualRatio = CalculateCompressionRatio(documents, compressed); + results.Add(actualRatio); + } + + // Assert + for (int i = 0; i < ratios.Length - 1; i++) + { + Assert.True(results[i] <= results[i + 1], + $"Higher compression ratio should produce longer output. Got {results[i]} and {results[i + 1]}"); + } + } + + [Fact] + public void Compress_WithDifferentQueries_ProducesDifferentResults() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query1 = "machine learning"; + var query2 = "weather sunny"; + + // Act + var result1 = compressor.Compress(documents, query1); + var result2 = compressor.Compress(documents, query2); + + // Assert + var content1 = string.Join(" ", result1.Select(d => d.Content)); + var content2 = string.Join(" ", result2.Select(d => d.Content)); + Assert.NotEqual(content1, content2); + } + + [Fact] + public void Compress_MultipleInvocations_ProducesSameResults() + { + // Arrange + var compressor = new LLMContextCompressor(compressionRatio: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result1 = compressor.Compress(documents, query); + var result2 = compressor.Compress(documents, query); + + // Assert + Assert.Equal(result1.Count, result2.Count); + for (int i = 0; i < result1.Count; i++) + { + Assert.Equal(result1[i].Content, result2[i].Content); + } + } + + #endregion + } +} diff --git a/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/SelectiveContextCompressorTests.cs b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/SelectiveContextCompressorTests.cs new file mode 100644 index 000000000..9b03f4d2c --- /dev/null +++ b/tests/AiDotNet.Tests/UnitTests/RetrievalAugmentedGeneration/ContextCompression/SelectiveContextCompressorTests.cs @@ -0,0 +1,518 @@ +using AiDotNet.RetrievalAugmentedGeneration.ContextCompression; +using AiDotNet.RetrievalAugmentedGeneration.Models; +using System; +using System.Collections.Generic; +using System.Linq; +using Xunit; + +namespace AiDotNetTests.UnitTests.RetrievalAugmentedGeneration.ContextCompression +{ + public class SelectiveContextCompressorTests : ContextCompressorTestBase + { + #region Constructor Tests + + [Fact] + public void Constructor_WithValidParameters_CreatesInstance() + { + // Arrange & Act + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.3); + + // Assert + Assert.NotNull(compressor); + } + + [Fact] + public void Constructor_WithZeroMaxSentences_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new SelectiveContextCompressor(maxSentences: 0, relevanceThreshold: 0.3)); + } + + [Fact] + public void Constructor_WithNegativeMaxSentences_ThrowsArgumentOutOfRangeException() + { + // Arrange & Act & Assert + Assert.Throws(() => + new SelectiveContextCompressor(maxSentences: -5, relevanceThreshold: 0.3)); + } + + [Fact] + public void Constructor_WithDifferentThresholds_CreatesInstance() + { + // Arrange & Act + var compressor1 = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var compressor2 = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.9); + + // Assert + Assert.NotNull(compressor1); + Assert.NotNull(compressor2); + } + + #endregion + + #region Basic Functionality Tests + + [Fact] + public void Compress_WithValidDocuments_ReturnsCompressedDocuments() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = CreateSampleDocuments(); + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.True(result.Count <= documents.Count); + } + + [Fact] + public void Compress_WithNullDocuments_ThrowsArgumentNullException() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.3); + var query = "test query"; + + // Act & Assert + Assert.Throws(() => + compressor.Compress(null, query)); + } + + [Fact] + public void Compress_WithNullQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.3); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, null)); + } + + [Fact] + public void Compress_WithEmptyQuery_ThrowsArgumentException() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.3); + var documents = CreateSampleDocuments(); + + // Act & Assert + Assert.Throws(() => + compressor.Compress(documents, string.Empty)); + } + + [Fact] + public void Compress_WithEmptyDocumentList_ReturnsEmptyList() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.3); + var documents = new List>(); + var query = "test query"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + Assert.Empty(result); + } + + #endregion + + #region Compression Quality Tests + + [Fact] + public void Compress_SelectsRelevantSentences() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 3, relevanceThreshold: 0.1); + var document = new Document("doc1", + "Machine learning is a subset of AI. The sky is blue today. Neural networks process data. Birds are singing outside. Deep learning uses layers."); + var documents = new List> { document }; + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotEmpty(result); + var content = result[0].Content.ToLowerInvariant(); + // Should prefer sentences with query terms + Assert.True( + content.Contains("machine") || + content.Contains("learning") || + content.Contains("neural")); + } + + [Fact] + public void Compress_RespectsMaxSentences() + { + // Arrange + var maxSentences = 2; + var compressor = new SelectiveContextCompressor(maxSentences: maxSentences, relevanceThreshold: 0.0); + var document = CreateDocumentWithLength("doc1", 10); + var documents = new List> { document }; + var query = "sentence test content"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + if (result.Any()) + { + var sentenceCount = result[0].Content.Split('.', StringSplitOptions.RemoveEmptyEntries).Length; + Assert.True(sentenceCount <= maxSentences, + $"Result should have at most {maxSentences} sentences, got {sentenceCount}"); + } + } + + [Fact] + public void Compress_WithHighThreshold_FiltersMoreSentences() + { + // Arrange + var lowThresholdCompressor = new SelectiveContextCompressor(maxSentences: 10, relevanceThreshold: 0.0); + var highThresholdCompressor = new SelectiveContextCompressor(maxSentences: 10, relevanceThreshold: 0.5); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var lowResult = lowThresholdCompressor.Compress(documents, query); + var highResult = highThresholdCompressor.Compress(documents, query); + + // Assert + var lowLength = lowResult.Sum(d => d.Content.Length); + var highLength = highResult.Sum(d => d.Content.Length); + Assert.True(highLength <= lowLength, + "Higher threshold should produce shorter or equal output"); + } + + [Fact] + public void Compress_PreservesMetadata() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + foreach (var doc in result) + { + var original = documents.FirstOrDefault(d => d.Id == doc.Id); + if (original != null && original.Metadata != null) + { + foreach (var kvp in original.Metadata) + { + Assert.True(doc.Metadata.ContainsKey(kvp.Key), + $"Metadata key '{kvp.Key}' should be preserved"); + } + } + } + } + + [Fact] + public void Compress_PreservesRelevanceScores() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + foreach (var doc in result) + { + var original = documents.FirstOrDefault(d => d.Id == doc.Id); + if (original != null && original.HasRelevanceScore) + { + Assert.True(doc.HasRelevanceScore, "HasRelevanceScore should be preserved"); + Assert.Equal(original.RelevanceScore, doc.RelevanceScore); + } + } + } + + [Fact] + public void Compress_PreservesDocumentIds() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + foreach (var doc in result) + { + Assert.True(documents.Any(d => d.Id == doc.Id), + $"Document ID '{doc.Id}' should exist in original documents"); + } + } + + #endregion + + #region Edge Cases Tests + + [Fact] + public void Compress_WithEmptyDocument_FiltersOutDocument() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = new List> + { + new Document("empty", string.Empty), + CreateSampleDocuments()[0] + }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + // Empty document might be filtered out + Assert.True(result.Count <= documents.Count); + } + + [Fact] + public void Compress_WithNoRelevantSentences_MayReturnEmpty() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.9); + var document = new Document("doc", "The sky is blue. Grass is green. Water is wet."); + var documents = new List> { document }; + var query = "machine learning neural networks artificial intelligence"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + // May be empty or have minimal content due to high threshold + } + + [Fact] + public void Compress_WithSingleSentence_HandlesCorrectly() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var document = new Document("single", "Machine learning is important."); + var documents = new List> { document }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotEmpty(result); + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_WithVeryLargeDocument_CompressesSuccessfully() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 10, relevanceThreshold: 0.1); + var largeDoc = CreateLargeDocument("large"); + var documents = new List> { largeDoc }; + var query = "document content long"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotEmpty(result); + if (result.Any() && !string.IsNullOrEmpty(result[0].Content)) + { + Assert.True(result[0].Content.Length < largeDoc.Content.Length); + } + } + + [Fact] + public void Compress_WithUnicodeContent_HandlesCorrectly() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.0); + var unicodeDoc = CreateUnicodeDocument("unicode"); + var documents = new List> { unicodeDoc }; + var query = "学习 learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + if (result.Any()) + { + Assert.NotEmpty(result[0].Content); + } + } + + [Fact] + public void Compress_WithSpecialCharacters_HandlesCorrectly() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.0); + var specialDoc = CreateSpecialCharDocument("special"); + var documents = new List> { specialDoc }; + var query = "special testing"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + if (result.Any()) + { + Assert.NotEmpty(result[0].Content); + } + } + + [Fact] + public void Compress_WithMultipleDocuments_ProcessesAll() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = new List>(); + for (int i = 0; i < 10; i++) + { + documents.Add(CreateDocumentWithLength($"doc{i}", 10)); + } + var query = "test sentence content"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + // Some documents might be filtered out if no relevant sentences + } + + #endregion + + #region Relevance Filtering Tests + + [Fact] + public void Compress_WithZeroThreshold_IncludesAllSentences() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 100, relevanceThreshold: 0.0); + var document = CreateDocumentWithLength("doc", 5); + var documents = new List> { document }; + var query = "test"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotEmpty(result); + // Should include content since threshold is 0 + Assert.NotEmpty(result[0].Content); + } + + [Fact] + public void Compress_OrdersSentencesByRelevance() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 2, relevanceThreshold: 0.0); + var document = new Document("doc", + "The weather is nice. Machine learning is important. Birds are flying. Neural networks are powerful."); + var documents = new List> { document }; + var query = "machine learning neural networks"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotEmpty(result); + var content = result[0].Content.ToLowerInvariant(); + // Most relevant sentences should be included + Assert.True( + content.Contains("machine") || + content.Contains("learning") || + content.Contains("neural")); + } + + #endregion + + #region Integration Tests + + [Fact] + public void Compress_WithDifferentMaxSentences_ProducesDifferentLengths() + { + // Arrange + var document = CreateDocumentWithLength("doc", 20); + var documents = new List> { document }; + var query = "test sentence content"; + + var compressor1 = new SelectiveContextCompressor(maxSentences: 2, relevanceThreshold: 0.0); + var compressor2 = new SelectiveContextCompressor(maxSentences: 10, relevanceThreshold: 0.0); + + // Act + var result1 = compressor1.Compress(documents, query); + var result2 = compressor2.Compress(documents, query); + + // Assert + if (result1.Any() && result2.Any()) + { + var length1 = result1[0].Content.Length; + var length2 = result2[0].Content.Length; + Assert.True(length2 >= length1, + "Higher maxSentences should produce longer or equal output"); + } + } + + [Fact] + public void Compress_MultipleInvocations_ProducesSameResults() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1); + var documents = CreateSampleDocuments(); + var query = "machine learning"; + + // Act + var result1 = compressor.Compress(documents, query); + var result2 = compressor.Compress(documents, query); + + // Assert + Assert.Equal(result1.Count, result2.Count); + for (int i = 0; i < result1.Count; i++) + { + Assert.Equal(result1[i].Content, result2[i].Content); + } + } + + [Fact] + public void Compress_WithFloatType_WorksCorrectly() + { + // Arrange + var compressor = new SelectiveContextCompressor(maxSentences: 5, relevanceThreshold: 0.1f); + var documents = new List> + { + new Document("doc1", "Machine learning is important. Neural networks are powerful.") + { + RelevanceScore = 0.9f, + HasRelevanceScore = true + } + }; + var query = "machine learning"; + + // Act + var result = compressor.Compress(documents, query); + + // Assert + Assert.NotNull(result); + } + + #endregion + } +}