11using System . IO . Compression ;
2+ using System . Text . RegularExpressions ;
23using NUnit . Framework . Constraints ;
4+ using SIL . Scripture ;
35
46namespace SIL . Machine . Corpora ;
57
@@ -18,6 +20,16 @@ internal static class CorporaTestHelpers
1820 public static readonly string UsfmSourceProjectPath = Path . Combine ( TestDataPath , "usfm" , "source" ) ;
1921 public static readonly string UsxTestProjectPath = Path . Combine ( TestDataPath , "usx" , "Tes" ) ;
2022 public static readonly string TextTestProjectPath = Path . Combine ( TestDataPath , "txt" ) ;
23+ public static readonly string DeuterocanonicalsSourcePath = Path . Combine (
24+ TestDataPath ,
25+ "deuterocanonicals" ,
26+ "source"
27+ ) ;
28+ public static readonly string DeuterocanonicalsTargetPath = Path . Combine (
29+ TestDataPath ,
30+ "deuterocanonicals" ,
31+ "target"
32+ ) ;
2133
2234 public static string CreateTestDblBundle ( )
2335 {
@@ -43,4 +55,135 @@ public static EqualConstraint IgnoreLineEndings(this EqualConstraint constraint)
4355 ( actual , expected ) => actual . ReplaceLineEndings ( ) == expected . ReplaceLineEndings ( )
4456 ) ;
4557 }
58+
59+ /// <summary>
60+ /// Sets up and returns the source corpus.
61+ /// </summary>
62+ /// <returns>The source corpus.</returns>
63+ public static ParatextTextCorpus GetDeuterocanonicalSourceCorpus ( )
64+ {
65+ return new ParatextTextCorpus ( CorporaTestHelpers . DeuterocanonicalsSourcePath , includeAllText : true ) ;
66+ }
67+
68+ /// <summary>
69+ /// Sets up and returns the target corpus.
70+ /// </summary>
71+ /// <returns>The target corpus.</returns>
72+ public static ParatextTextCorpus GetDeuterocanonicalTargetCorpus ( )
73+ {
74+ return new ParatextTextCorpus ( CorporaTestHelpers . DeuterocanonicalsTargetPath , includeAllText : true ) ;
75+ }
76+
77+ /// <summary>
78+ /// Sets up and returns both the source and target corpora.
79+ /// </summary>
80+ /// <returns>A tuple containing the source corpus (first) and target corpus (second).</returns>
81+ public static ( ParatextTextCorpus sourceCorpus , ParatextTextCorpus targetCorpus ) GetDeuterocanonicalCorpora ( )
82+ {
83+ var sourceCorpus = GetDeuterocanonicalSourceCorpus ( ) ;
84+ var targetCorpus = GetDeuterocanonicalTargetCorpus ( ) ;
85+ return ( sourceCorpus , targetCorpus ) ;
86+ }
87+
88+ /// <summary>
89+ /// Expands a hyphenated verse range (e.g., "S3Y 1:1-29") into individual verses.
90+ /// </summary>
91+ public static IEnumerable < ScriptureRef > ExpandVerseRange ( string verseRange , ScrVers versification )
92+ {
93+ var parts = verseRange . Split ( ':' ) ;
94+ var bookAndChapter = parts [ 0 ] . Trim ( ) ;
95+ var verses = parts [ 1 ] ;
96+
97+ if ( verses . Contains ( '-' ) )
98+ {
99+ var rangeParts = verses . Split ( '-' ) . Select ( int . Parse ) . ToArray ( ) ;
100+ var startVerse = rangeParts [ 0 ] ;
101+ var endVerse = rangeParts [ 1 ] ;
102+
103+ for ( int verse = startVerse ; verse <= endVerse ; verse ++ )
104+ {
105+ yield return ScriptureRef . Parse ( $ "{ bookAndChapter } :{ verse } ", versification ) ;
106+ }
107+ }
108+ else
109+ {
110+ yield return ScriptureRef . Parse ( verseRange , versification ) ;
111+ }
112+ }
113+
114+ public static Dictionary < string , string > ExpandVerseMappings ( Dictionary < string , string > mappings )
115+ {
116+ var expandedMappings = new Dictionary < string , string > ( ) ;
117+
118+ foreach ( var mapping in mappings )
119+ {
120+ var sourceParts = ParseRange ( mapping . Key ) ;
121+ var targetParts = ParseRange ( mapping . Value ) ;
122+
123+ // Check if either source or target is a single verse
124+ if ( sourceParts . IsSingleVerse && targetParts . IsSingleVerse )
125+ {
126+ expandedMappings [ mapping . Key ] = mapping . Value ;
127+ continue ;
128+ }
129+
130+ int sourceVerseCount = sourceParts . EndVerse - sourceParts . StartVerse + 1 ;
131+ int targetVerseCount = targetParts . EndVerse - targetParts . StartVerse + 1 ;
132+
133+ if ( sourceVerseCount != targetVerseCount )
134+ {
135+ throw new InvalidOperationException (
136+ "Source and target verse ranges must have the same number of verses."
137+ ) ;
138+ }
139+
140+ for ( int i = 0 ; i < sourceVerseCount ; i ++ )
141+ {
142+ string sourceVerse = $ "{ sourceParts . Book } { sourceParts . Chapter } :{ sourceParts . StartVerse + i } ";
143+ string targetVerse = $ "{ targetParts . Book } { targetParts . Chapter } :{ targetParts . StartVerse + i } ";
144+
145+ expandedMappings [ sourceVerse ] = targetVerse ;
146+ }
147+ }
148+
149+ return expandedMappings ;
150+ }
151+
152+ public static ( string Book , int Chapter , int StartVerse , int EndVerse , bool IsSingleVerse ) ParseRange ( string range )
153+ {
154+ var parts = range . Split ( ' ' ) ;
155+ var book = parts [ 0 ] ;
156+
157+ var chapterAndVerses = parts [ 1 ] . Split ( ':' ) ;
158+ int chapter = int . Parse ( chapterAndVerses [ 0 ] ) ;
159+
160+ var verseRange = chapterAndVerses [ 1 ] . Split ( '-' ) ;
161+
162+ int startVerse = int . Parse ( verseRange [ 0 ] ) ;
163+ int endVerse = verseRange . Length > 1 ? int . Parse ( verseRange [ 1 ] ) : startVerse ;
164+
165+ bool isSingleVerse = startVerse == endVerse ;
166+
167+ return ( book , chapter , startVerse , endVerse , isSingleVerse ) ;
168+ }
169+
170+ /// <summary>
171+ /// Removes unwanted characters in a corpus string.
172+ /// </summary>
173+ public static string CleanString ( string input , string [ ] unwanted )
174+ {
175+ foreach ( var item in unwanted )
176+ {
177+ input = input . Replace ( item , "" ) . Trim ( ) ;
178+ }
179+ return input ;
180+ }
181+
182+ /// <summary>
183+ /// Replace multiple spaces with a single space.
184+ /// </summary>
185+ public static string NormalizeSpaces ( string input )
186+ {
187+ return Regex . Replace ( input , @"\s+" , " " ) ;
188+ }
46189}
0 commit comments