Skip to content

Commit 84fbec8

Browse files
MAde changes to PR
1 parent 14eceeb commit 84fbec8

30 files changed

+16459
-502
lines changed

tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
using System.IO.Compression;
2+
using System.Text.RegularExpressions;
23
using NUnit.Framework.Constraints;
4+
using SIL.Scripture;
35

46
namespace SIL.Machine.Corpora;
57

@@ -18,6 +20,16 @@ internal static class CorporaTestHelpers
1820
public static readonly string UsfmSourceProjectPath = Path.Combine(TestDataPath, "usfm", "source");
1921
public static readonly string UsxTestProjectPath = Path.Combine(TestDataPath, "usx", "Tes");
2022
public static readonly string TextTestProjectPath = Path.Combine(TestDataPath, "txt");
23+
public static readonly string DeuterocanonicalsSourcePath = Path.Combine(
24+
TestDataPath,
25+
"deuterocanonicals",
26+
"source"
27+
);
28+
public static readonly string DeuterocanonicalsTargetPath = Path.Combine(
29+
TestDataPath,
30+
"deuterocanonicals",
31+
"target"
32+
);
2133

2234
public static string CreateTestDblBundle()
2335
{
@@ -43,4 +55,135 @@ public static EqualConstraint IgnoreLineEndings(this EqualConstraint constraint)
4355
(actual, expected) => actual.ReplaceLineEndings() == expected.ReplaceLineEndings()
4456
);
4557
}
58+
59+
/// <summary>
60+
/// Sets up and returns the source corpus.
61+
/// </summary>
62+
/// <returns>The source corpus.</returns>
63+
public static ParatextTextCorpus GetDeuterocanonicalSourceCorpus()
64+
{
65+
return new ParatextTextCorpus(CorporaTestHelpers.DeuterocanonicalsSourcePath, includeAllText: true);
66+
}
67+
68+
/// <summary>
69+
/// Sets up and returns the target corpus.
70+
/// </summary>
71+
/// <returns>The target corpus.</returns>
72+
public static ParatextTextCorpus GetDeuterocanonicalTargetCorpus()
73+
{
74+
return new ParatextTextCorpus(CorporaTestHelpers.DeuterocanonicalsTargetPath, includeAllText: true);
75+
}
76+
77+
/// <summary>
78+
/// Sets up and returns both the source and target corpora.
79+
/// </summary>
80+
/// <returns>A tuple containing the source corpus (first) and target corpus (second).</returns>
81+
public static (ParatextTextCorpus sourceCorpus, ParatextTextCorpus targetCorpus) GetDeuterocanonicalCorpora()
82+
{
83+
var sourceCorpus = GetDeuterocanonicalSourceCorpus();
84+
var targetCorpus = GetDeuterocanonicalTargetCorpus();
85+
return (sourceCorpus, targetCorpus);
86+
}
87+
88+
/// <summary>
89+
/// Expands a hyphenated verse range (e.g., "S3Y 1:1-29") into individual verses.
90+
/// </summary>
91+
public static IEnumerable<ScriptureRef> ExpandVerseRange(string verseRange, ScrVers versification)
92+
{
93+
var parts = verseRange.Split(':');
94+
var bookAndChapter = parts[0].Trim();
95+
var verses = parts[1];
96+
97+
if (verses.Contains('-'))
98+
{
99+
var rangeParts = verses.Split('-').Select(int.Parse).ToArray();
100+
var startVerse = rangeParts[0];
101+
var endVerse = rangeParts[1];
102+
103+
for (int verse = startVerse; verse <= endVerse; verse++)
104+
{
105+
yield return ScriptureRef.Parse($"{bookAndChapter}:{verse}", versification);
106+
}
107+
}
108+
else
109+
{
110+
yield return ScriptureRef.Parse(verseRange, versification);
111+
}
112+
}
113+
114+
public static Dictionary<string, string> ExpandVerseMappings(Dictionary<string, string> mappings)
115+
{
116+
var expandedMappings = new Dictionary<string, string>();
117+
118+
foreach (var mapping in mappings)
119+
{
120+
var sourceParts = ParseRange(mapping.Key);
121+
var targetParts = ParseRange(mapping.Value);
122+
123+
// Check if either source or target is a single verse
124+
if (sourceParts.IsSingleVerse && targetParts.IsSingleVerse)
125+
{
126+
expandedMappings[mapping.Key] = mapping.Value;
127+
continue;
128+
}
129+
130+
int sourceVerseCount = sourceParts.EndVerse - sourceParts.StartVerse + 1;
131+
int targetVerseCount = targetParts.EndVerse - targetParts.StartVerse + 1;
132+
133+
if (sourceVerseCount != targetVerseCount)
134+
{
135+
throw new InvalidOperationException(
136+
"Source and target verse ranges must have the same number of verses."
137+
);
138+
}
139+
140+
for (int i = 0; i < sourceVerseCount; i++)
141+
{
142+
string sourceVerse = $"{sourceParts.Book} {sourceParts.Chapter}:{sourceParts.StartVerse + i}";
143+
string targetVerse = $"{targetParts.Book} {targetParts.Chapter}:{targetParts.StartVerse + i}";
144+
145+
expandedMappings[sourceVerse] = targetVerse;
146+
}
147+
}
148+
149+
return expandedMappings;
150+
}
151+
152+
public static (string Book, int Chapter, int StartVerse, int EndVerse, bool IsSingleVerse) ParseRange(string range)
153+
{
154+
var parts = range.Split(' ');
155+
var book = parts[0];
156+
157+
var chapterAndVerses = parts[1].Split(':');
158+
int chapter = int.Parse(chapterAndVerses[0]);
159+
160+
var verseRange = chapterAndVerses[1].Split('-');
161+
162+
int startVerse = int.Parse(verseRange[0]);
163+
int endVerse = verseRange.Length > 1 ? int.Parse(verseRange[1]) : startVerse;
164+
165+
bool isSingleVerse = startVerse == endVerse;
166+
167+
return (book, chapter, startVerse, endVerse, isSingleVerse);
168+
}
169+
170+
/// <summary>
171+
/// Removes unwanted characters in a corpus string.
172+
/// </summary>
173+
public static string CleanString(string input, string[] unwanted)
174+
{
175+
foreach (var item in unwanted)
176+
{
177+
input = input.Replace(item, "").Trim();
178+
}
179+
return input;
180+
}
181+
182+
/// <summary>
183+
/// Replace multiple spaces with a single space.
184+
/// </summary>
185+
public static string NormalizeSpaces(string input)
186+
{
187+
return Regex.Replace(input, @"\s+", " ");
188+
}
46189
}

0 commit comments

Comments
 (0)