|
| 1 | +using Infidex.Core; |
| 2 | +using Infidex.Indexing.Segments; |
| 3 | +using Microsoft.VisualStudio.TestTools.UnitTesting; |
| 4 | + |
| 5 | +namespace Infidex.Tests; |
| 6 | + |
| 7 | +[TestClass] |
| 8 | +public class SegmentTests |
| 9 | +{ |
| 10 | + [TestMethod] |
| 11 | + public void WriteAndReadSegment_ShouldWork() |
| 12 | + { |
| 13 | + var terms = new TermCollection(); |
| 14 | + |
| 15 | + // Term "apple": Doc 1 (wt 10), Doc 3 (wt 20) |
| 16 | + var t1 = terms.CountTermUsage("apple", 100); |
| 17 | + t1.FirstCycleAdd(1, 100, false, 10.0f); |
| 18 | + t1.FirstCycleAdd(3, 100, false, 20.0f); |
| 19 | + |
| 20 | + // Term "banana": Doc 2 (wt 5) |
| 21 | + var t2 = terms.CountTermUsage("banana", 100); |
| 22 | + t2.FirstCycleAdd(2, 100, false, 5.0f); |
| 23 | + |
| 24 | + string path = "test_segment.seg"; |
| 25 | + if (File.Exists(path)) File.Delete(path); |
| 26 | + |
| 27 | + var writer = new SegmentWriter(); |
| 28 | + writer.WriteSegment(terms, 5, 0, path); // 5 docs total |
| 29 | + |
| 30 | + using (var reader = new SegmentReader(path)) |
| 31 | + { |
| 32 | + Assert.AreEqual(2, reader.FstIndex.TermCount); |
| 33 | + Assert.AreEqual(5, reader.DocCount); |
| 34 | + |
| 35 | + var applePostings = reader.GetPostings("apple"); |
| 36 | + Assert.IsNotNull(applePostings); |
| 37 | + Assert.AreEqual(2, applePostings.Value.DocIds.Length); |
| 38 | + Assert.AreEqual(1, applePostings.Value.DocIds[0]); |
| 39 | + Assert.AreEqual(3, applePostings.Value.DocIds[1]); |
| 40 | + Assert.AreEqual((byte)10, applePostings.Value.Weights[0]); |
| 41 | + |
| 42 | + var bananaPostings = reader.GetPostings("banana"); |
| 43 | + Assert.IsNotNull(bananaPostings); |
| 44 | + Assert.AreEqual(1, bananaPostings.Value.DocIds.Length); |
| 45 | + Assert.AreEqual(2, bananaPostings.Value.DocIds[0]); |
| 46 | + |
| 47 | + Assert.IsNull(reader.GetPostings("orange")); |
| 48 | + } |
| 49 | + |
| 50 | + File.Delete(path); |
| 51 | + } |
| 52 | + |
| 53 | + [TestMethod] |
| 54 | + public void MergeSegments_ShouldWork() |
| 55 | + { |
| 56 | + string seg1Path = "seg1.seg"; |
| 57 | + string seg2Path = "seg2.seg"; |
| 58 | + string mergedPath = "merged.seg"; |
| 59 | + |
| 60 | + // Segment 1 (Docs 0-4) |
| 61 | + var terms1 = new TermCollection(); |
| 62 | + var t1 = terms1.CountTermUsage("common", 100); |
| 63 | + t1.FirstCycleAdd(1, 100, false, 10f); |
| 64 | + var t2 = terms1.CountTermUsage("unique1", 100); |
| 65 | + t2.FirstCycleAdd(2, 100, false, 20f); |
| 66 | + |
| 67 | + var writer = new SegmentWriter(); |
| 68 | + writer.WriteSegment(terms1, 5, 0, seg1Path); |
| 69 | + |
| 70 | + // Segment 2 (Docs 0-4 -> mapped to 5-9) |
| 71 | + var terms2 = new TermCollection(); |
| 72 | + var t3 = terms2.CountTermUsage("common", 100); |
| 73 | + t3.FirstCycleAdd(0, 100, false, 30f); // Becomes Doc 5 |
| 74 | + var t4 = terms2.CountTermUsage("unique2", 100); |
| 75 | + t4.FirstCycleAdd(3, 100, false, 40f); // Becomes Doc 8 |
| 76 | + |
| 77 | + writer.WriteSegment(terms2, 5, 0, seg2Path); |
| 78 | + |
| 79 | + // Merge |
| 80 | + var merger = new SegmentMerger(); |
| 81 | + var readers = new List<SegmentReader> |
| 82 | + { |
| 83 | + new SegmentReader(seg1Path), |
| 84 | + new SegmentReader(seg2Path) |
| 85 | + }; |
| 86 | + |
| 87 | + merger.MergeSegments(readers, mergedPath); |
| 88 | + |
| 89 | + foreach(var r in readers) r.Dispose(); |
| 90 | + |
| 91 | + // Verify Merged |
| 92 | + using (var reader = new SegmentReader(mergedPath)) |
| 93 | + { |
| 94 | + Assert.AreEqual(3, reader.FstIndex.TermCount); // common, unique1, unique2 |
| 95 | + Assert.AreEqual(10, reader.DocCount); |
| 96 | + |
| 97 | + var common = reader.GetPostings("common"); |
| 98 | + Assert.IsNotNull(common); |
| 99 | + Assert.AreEqual(2, common.Value.DocIds.Length); |
| 100 | + Assert.AreEqual(1, common.Value.DocIds[0]); // From Seg1 |
| 101 | + Assert.AreEqual(5, common.Value.DocIds[1]); // From Seg2 (0 + 5) |
| 102 | + Assert.AreEqual((byte)10, common.Value.Weights[0]); |
| 103 | + Assert.AreEqual((byte)30, common.Value.Weights[1]); |
| 104 | + |
| 105 | + var unique1 = reader.GetPostings("unique1"); |
| 106 | + Assert.AreEqual(1, unique1.Value.DocIds.Length); |
| 107 | + Assert.AreEqual(2, unique1.Value.DocIds[0]); |
| 108 | + |
| 109 | + var unique2 = reader.GetPostings("unique2"); |
| 110 | + Assert.AreEqual(1, unique2.Value.DocIds.Length); |
| 111 | + Assert.AreEqual(8, unique2.Value.DocIds[0]); // From Seg2 (3 + 5) |
| 112 | + } |
| 113 | + |
| 114 | + File.Delete(seg1Path); |
| 115 | + File.Delete(seg2Path); |
| 116 | + File.Delete(mergedPath); |
| 117 | + } |
| 118 | +} |
0 commit comments