@@ -9,6 +9,11 @@ module Reader
99 # input.at(offset). It might be more efficient to replace char-at-a-time reads
1010 # with Regexps that consume multiple characters at once.
1111
12+ # tr separators.segment
13+ # gs separators.element
14+ # us separators.component
15+ # rs separators.repetition
16+
1217 class Tokenizer
1318 include Inspect
1419
@@ -190,15 +195,15 @@ def _next_isa_segment_id(input)
190195 return eof ( "ISA" , input . position ) unless input . defined_at? ( s )
191196
192197 # The character after this "I" is not "S"
193- next unless input [ s , 1 ] == @s
198+ next unless input . start_with? ( @s , s ) # [s, 1] == @s
194199
195200 a = input . min_graphic_index ( s +1 )
196201
197202 #return eof("ISA", input.position_at(i)) unless input.defined_at?(a)
198203 return eof ( "ISA" , input . position ) unless input . defined_at? ( a )
199204
200205 # The character after this "S" is not "A"
201- next unless input [ a , 1 ] == @a
206+ next unless input . start_with? @a , a ) # [a, 1] == @a
202207
203208 # The next character determines the element separator. If it's an
204209 # alphanumeric or space, we assume this is not the start of an ISA
@@ -265,20 +270,30 @@ def _next_segment_id(input)
265270 buffer = input . pointer . drop_take ( offset , 0 )
266271 start_pos = input . position_at ( offset )
267272
268- while true
269- return eof ( "segment identifier" , input . position ) \
270- unless input . defined_at? ( offset )
273+ # while true
274+ # return eof("segment identifier", input.position) \
275+ # unless input.defined_at?(offset)
271276
272- char = input . at ( offset )
273- break if char == @separators . element
274- break if char == @separators . segment
277+ # char = input.at(offset)
278+ # break if char == @separators.element
279+ # break if char == @separators.segment
275280
276- # Zero-copy as long as we've not skipped over any characters yet
277- buffer << char if input . graphic? ( offset )
278- offset += 1
281+ # # Zero-copy as long as we've not skipped over any characters yet
282+ # buffer << char if input.graphic?(offset)
283+ # offset += 1
279284
280- break if buffer . length >= 3
281- end
285+ # break if buffer.length >= 3
286+ # end
287+
288+ # Whichever occurs first
289+ gs = input . index ( @separators . element , offset )
290+ tr = input . index ( @separators . segment , offset )
291+ xx = if gs and tr and gs < tr ; then gs end || tr || gs
292+ return eof ( "segment identifier" , input . position ) unless xx
293+
294+ length = xx - offset
295+ length = 3 if length > 3
296+ buffer = input [ offset , length ]
282297
283298 # This is the only String allocation we cannot get around. The `match?`
284299 # call either has a pattern with \A..\z, or the length of segment_id
@@ -292,7 +307,7 @@ def _next_segment_id(input)
292307 return expected ( "segment identifier, found %s" % segment_id . inspect ,
293308 start_pos ) unless segment_id . match? ( VALID_SEGMENT_ID )
294309
295- return done ( segment_id . to_sym , start_pos , input . drop! ( offset ) )
310+ return done ( segment_id . to_sym , start_pos , input . drop! ( xx ) )
296311 end
297312
298313 # @param input should be positioned on an element separator: "NM1[*].."
@@ -429,27 +444,57 @@ def _read_component_element(input, repeatable, segment_id, element_idx, componen
429444 builder = @switcher_ . switch ( repeatable , input . position )
430445
431446 while input . defined_at? ( offset )
432- char = input . at ( offset )
447+ # char = input.at(offset)
448+
449+ # if repeatable and char == @separators.repetition
450+ # builder.add(Tokens::SimpleElementTok.build(buffer, repeat_pos))
451+ # offset += 1
452+ # repeat_pos = input.position_at(offset)
453+
454+ # elsif char == @separators.segment \
455+ # or char == @separators.element \
456+ # or char == @separators.component \
457+ # or char == @separators.repetition
458+ # # Because we're not repeatable, a repetition seperator could
459+ # # belong to the parent/composite element. If it's not repeatable
460+ # # either, an error can be returned.
461+ # builder.add(Tokens::ComponentElementTok.build(buffer, repeat_pos))
462+ # return done(builder.build, builder.position, input.drop!(offset))
463+
464+ # else
465+ # # This is zero-copy as long as we haven't skipped any characters
466+ # buffer << char if input.graphic?(offset)
467+ # offset += 1
468+ # end
469+
470+ # Whichever occurs first
471+ tr = input . index ( @separators . segment , offset ) if @separators . segment
472+ gs = input . index ( @separators . element , offset ) if @separators . element
473+ us = input . index ( @separators . component , offset ) if @separators . component
474+ xx = if gs and tr and gs < tr ; then gs end || tr || gs
475+ xx = if xx and us and xx < us ; then xx end || us || xx
476+
477+ rs = input . index ( @separators . repetition , offset ) \
478+ if @separators . repetition and repeatable
479+
480+ if rs and rs < xx
481+ length = rs - offset
482+ buffer = input [ offset , length ]
433483
434- if repeatable and char == @separators . repetition
435484 builder . add ( Tokens ::SimpleElementTok . build ( buffer , repeat_pos ) )
436- repeat_pos = input . position_at ( offset + 1 )
437- offset += 1
485+ offset = rs + 1
486+ repeat_pos = input . position_at ( offset )
487+ elsif xx
488+ length = xx - offset
489+ buffer = input [ offset , length ]
438490
439- elsif char == @separators . segment \
440- or char == @separators . element \
441- or char == @separators . component \
442- or char == @separators . repetition
443491 # Because we're not repeatable, a repetition seperator could
444492 # belong to the parent/composite element. If it's not repeatable
445493 # either, an error can be returned.
446494 builder . add ( Tokens ::ComponentElementTok . build ( buffer , repeat_pos ) )
447- return done ( builder . build , builder . position , input . drop! ( offset ) )
448-
495+ return done ( builder . build , builder . position , input . drop! ( xx ) )
449496 else
450- # This is zero-copy as long as we haven't skipped any characters
451- buffer << char if input . graphic? ( offset )
452- offset += 1
497+ break
453498 end
454499 end
455500
@@ -482,29 +527,58 @@ def _read_simple_element(input, repeatable, segment_id, element_idx)
482527 input . position ) unless input . start_with? ( @separators . element )
483528
484529 offset = input . min_graphic_index ( 1 )
485- buffer = input . pointer . drop_take ( offset , 0 )
530+ # buffer = input.pointer.drop_take(offset, 0)
486531 start_pos = input . position
487532 repeat_pos = input . position
488533 builder = @switcher . switch ( repeatable , input . position )
489534
490535 while input . defined_at? ( offset )
491- char = input . at ( offset )
492-
493- if char == @separators . element \
494- or char == @separators . segment
495- builder . add ( Tokens ::SimpleElementTok . build ( buffer , repeat_pos ) )
496- return done ( builder . build , start_pos , input . drop! ( offset ) )
536+ # char = input.at(offset)
537+
538+ # if char == @separators.element \
539+ # or char == @separators.segment
540+ # builder.add(Tokens::SimpleElementTok.build(buffer, repeat_pos))
541+ # return done(builder.build, start_pos, input.drop!(offset))
542+
543+ # elsif repeatable and char == @separators.repetition
544+ # builder.add(Tokens::SimpleElementTok.build(buffer, repeat_pos))
545+ # offset += 1
546+ # buffer = input.pointer.drop_take(offset, 0)
547+ # repeat_pos = input.position_at(offset)
548+
549+ # else
550+ # # This is zero-copy as long as we haven't skipped any characters
551+ # buffer << char if input.graphic?(offset)
552+ # offset += 1
553+ # end
554+
555+ # Whichever occurs first
556+ tr = input . index ( @separators . segment , offset ) if @separators . segment
557+ gs = input . index ( @separators . element , offset ) if @separators . element
558+ xx = if gs and tr and gs < tr ; then gs end || tr || gs
559+ break unless xx
560+
561+ rs = input . index ( @separators . repetition , offset ) if @separators . repetition
562+
563+ if rs and rs < xx
564+ # @sepatarors.repetition
565+ length = rs - offset
566+ buffer = input [ offset , length ]
497567
498- elsif repeatable and char == @separators . repetition
499568 builder . add ( Tokens ::SimpleElementTok . build ( buffer , repeat_pos ) )
500- offset += 1
569+ offset = rs + 1
501570 buffer = input . pointer . drop_take ( offset , 0 )
502571 repeat_pos = input . position_at ( offset )
572+ elsif xx
573+ # @separators.element
574+ # @separators.segment
575+ length = xx - offset
576+ buffer = input [ offset , length ]
503577
578+ builder . add ( Tokens ::SimpleElementTok . build ( buffer , repeat_pos ) )
579+ return done ( builder . build , start_pos , input . drop! ( xx ) )
504580 else
505- # This is zero-copy as long as we haven't skipped any characters
506- buffer << char if input . graphic? ( offset )
507- offset += 1
581+ break
508582 end
509583 end
510584
0 commit comments