11using System . Collections . Generic ;
22using System . Linq ;
3+ using SIL . Extensions ;
34using SIL . Scripture ;
45
56namespace SIL . Machine . Corpora
@@ -9,7 +10,8 @@ public enum ScriptureTextType
910 None ,
1011 NonVerse ,
1112 Verse ,
12- Note
13+ Embedded ,
14+ NoteText
1315 }
1416
1517 public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
@@ -19,6 +21,9 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
1921 private readonly Stack < ScriptureTextType > _curTextType ;
2022 private bool _duplicateVerse = false ;
2123
24+ private bool _inEmbedded ;
25+ public bool InNoteText { get ; private set ; }
26+
2227 protected ScriptureRefUsfmParserHandlerBase ( )
2328 {
2429 _curElements = new Stack < ScriptureElement > ( ) ;
@@ -59,7 +64,7 @@ string pubNumber
5964 // ignore duplicate verses
6065 _duplicateVerse = true ;
6166 }
62- else if ( VerseRef . AreOverlappingVersesRanges ( number , _curVerseRef . Verse ) )
67+ else if ( VerseRef . AreOverlappingVersesRanges ( verse1 : number , verse2 : _curVerseRef . Verse ) )
6368 {
6469 // merge overlapping verse ranges in to one range
6570 VerseRef verseRef = _curVerseRef . Clone ( ) ;
@@ -153,20 +158,36 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close
153158
154159 public override void StartNote ( UsfmParserState state , string marker , string caller , string category )
155160 {
156- if ( CurrentTextType != ScriptureTextType . None && ! _duplicateVerse )
161+ _inEmbedded = true ;
162+ StartEmbedded ( state , marker , caller , category ) ;
163+ }
164+
165+ public override void EndNote ( UsfmParserState state , string marker , bool closed )
166+ {
167+ EndNoteText ( state ) ;
168+ EndEmbedded ( state , marker , null , closed ) ;
169+ _inEmbedded = false ;
170+ }
171+
172+ public virtual void StartEmbedded ( UsfmParserState state , string marker , string caller , string category )
173+ {
174+ if ( _curVerseRef . IsDefault )
175+ UpdateVerseRef ( state . VerseRef , marker ) ;
176+
177+ if ( ! _duplicateVerse )
157178 {
158179 // if we hit a note in a verse paragraph and we aren't in a verse, then start a non-verse segment
159180 CheckConvertVerseParaToNonVerse ( state ) ;
160181 NextElement ( marker ) ;
161- StartNoteText ( state ) ;
162182 }
163183 }
164184
165- public override void EndNote ( UsfmParserState state , string marker , bool closed )
166- {
167- if ( CurrentTextType == ScriptureTextType . Note && ! _duplicateVerse )
168- EndNoteText ( state ) ;
169- }
185+ public virtual void EndEmbedded (
186+ UsfmParserState state ,
187+ string marker ,
188+ IReadOnlyList < UsfmAttribute > attributes ,
189+ bool closed
190+ ) { }
170191
171192 public override void Text ( UsfmParserState state , string text )
172193 {
@@ -187,9 +208,37 @@ public override void StartChar(
187208 IReadOnlyList < UsfmAttribute > attributes
188209 )
189210 {
211+ if ( IsEmbeddedPart ( markerWithoutPlus ) )
212+ EndNoteText ( state ) ;
213+
190214 // if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse
191215 // segment
192216 CheckConvertVerseParaToNonVerse ( state ) ;
217+
218+ if ( IsEmbeddedCharacter ( markerWithoutPlus ) )
219+ {
220+ _inEmbedded = true ;
221+ StartEmbedded ( state , markerWithoutPlus , null , null ) ;
222+ }
223+
224+ if ( IsNoteText ( markerWithoutPlus ) )
225+ {
226+ StartNoteText ( state ) ;
227+ }
228+ }
229+
230+ public override void EndChar (
231+ UsfmParserState state ,
232+ string marker ,
233+ IReadOnlyList < UsfmAttribute > attributes ,
234+ bool closed
235+ )
236+ {
237+ if ( IsEmbeddedCharacter ( marker ) )
238+ {
239+ EndEmbedded ( state , marker , attributes , closed ) ;
240+ _inEmbedded = false ;
241+ }
193242 }
194243
195244 protected virtual void StartVerseText ( UsfmParserState state , IReadOnlyList < ScriptureRef > scriptureRefs ) { }
@@ -200,8 +249,25 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr
200249
201250 protected virtual void EndNonVerseText ( UsfmParserState state , ScriptureRef scriptureRef ) { }
202251
252+ public virtual void StartNoteText ( UsfmParserState state )
253+ {
254+ InNoteText = true ;
255+ _curTextType . Push ( ScriptureTextType . NoteText ) ;
256+ StartNoteText ( state , CreateNonVerseRef ( ) ) ;
257+ }
258+
203259 protected virtual void StartNoteText ( UsfmParserState state , ScriptureRef scriptureRef ) { }
204260
261+ public virtual void EndNoteText ( UsfmParserState state )
262+ {
263+ if ( _curTextType . Count > 0 && _curTextType . Peek ( ) == ScriptureTextType . NoteText )
264+ {
265+ EndNoteText ( state , CreateNonVerseRef ( ) ) ;
266+ _curTextType . Pop ( ) ;
267+ InNoteText = false ;
268+ }
269+ }
270+
205271 protected virtual void EndNoteText ( UsfmParserState state , ScriptureRef scriptureRef ) { }
206272
207273 private void StartVerseText ( UsfmParserState state )
@@ -227,22 +293,11 @@ private void StartNonVerseText(UsfmParserState state)
227293
228294 private void EndNonVerseText ( UsfmParserState state )
229295 {
296+ EndEmbeddedElements ( ) ;
230297 EndNonVerseText ( state , CreateNonVerseRef ( ) ) ;
231298 _curTextType . Pop ( ) ;
232299 }
233300
234- private void StartNoteText ( UsfmParserState state )
235- {
236- _curTextType . Push ( ScriptureTextType . Note ) ;
237- StartNoteText ( state , CreateNonVerseRef ( ) ) ;
238- }
239-
240- private void EndNoteText ( UsfmParserState state )
241- {
242- EndNoteText ( state , CreateNonVerseRef ( ) ) ;
243- _curTextType . Pop ( ) ;
244- }
245-
246301 private void UpdateVerseRef ( VerseRef verseRef , string marker )
247302 {
248303 if ( ! VerseRef . AreOverlappingVersesRanges ( verseRef , _curVerseRef ) )
@@ -270,6 +325,12 @@ private void EndParentElement()
270325 _curElements . Pop ( ) ;
271326 }
272327
328+ private void EndEmbeddedElements ( )
329+ {
330+ if ( _curElements . Count > 0 && IsEmbeddedCharacter ( _curElements . Peek ( ) . Name ) )
331+ _curElements . Pop ( ) ;
332+ }
333+
273334 private IReadOnlyList < ScriptureRef > CreateVerseRefs ( )
274335 {
275336 return _curVerseRef . HasMultiple
@@ -300,5 +361,25 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
300361 StartNonVerseText ( state ) ;
301362 }
302363 }
364+
365+ public bool InEmbedded ( string marker )
366+ {
367+ return _inEmbedded || IsEmbeddedCharacter ( marker ) ;
368+ }
369+
370+ private static bool IsNoteText ( string marker )
371+ {
372+ return marker == "ft" ;
373+ }
374+
375+ public static bool IsEmbeddedPart ( string marker )
376+ {
377+ return ! ( marker is null ) && marker . Length > 0 && marker [ 0 ] . IsOneOf ( 'f' , 'x' , 'z' ) ;
378+ }
379+
380+ private static bool IsEmbeddedCharacter ( string marker )
381+ {
382+ return marker . IsOneOf ( "f" , "fe" , "fig" , "fm" , "x" ) ;
383+ }
303384 }
304385}
0 commit comments