@@ -528,9 +528,7 @@ def test_query_run_has_correct_offset(self):
528528 result = [qr .to_dict () for qr in q .query_runs ]
529529 expected = [
530530 {'end' : 0 , 'start' : 0 , 'tokens' : u'inc' },
531- {
532- 'end' : 123 ,
533- 'start' : 1 ,
531+ {'end' : 123 , 'start' : 1 ,
534532 'tokens' : (
535533 u'this library is free software you can redistribute it and or modify '
536534 u'it under the terms of the gnu library general public license as '
@@ -542,10 +540,43 @@ def test_query_run_has_correct_offset(self):
542540 u'license for more details you should have received a copy of the gnu '
543541 u'library general public license along with this library see the file '
544542 u'copying lib if not write to the free software foundation inc 51 '
545- u'franklin street fifth floor boston ma 02110 1301 usa'
546- )
547- }]
543+ u'franklin street fifth floor boston ma 02110 1301 usa' )
544+ }
545+ ]
546+ assert expected == result
547+
548+ def test_query_run_and_tokenizing_breaking_works__with_plus_as_expected (self ):
549+ rule_dir = self .get_test_loc ('query/run_breaking/rules' )
550+ rules = list (models .load_rules (rule_dir ))
551+ idx = index .LicenseIndex (rules )
552+ query_doc = self .get_test_loc ('query/run_breaking/query.txt' )
553+ q = Query (query_doc , idx = idx )
554+ result = [qr .to_dict () for qr in q .query_runs ]
555+ expected = [
556+ {'end' : 121 , 'start' : 0 ,
557+ 'tokens' :
558+ 'this library is free software you can redistribute it '
559+ 'and or modify it under the terms of the gnu library '
560+ 'general public license as published by the free software '
561+ 'foundation either version 2 of the license or at your '
562+ 'option any later version this library is distributed in '
563+ 'the hope that it will be useful but without any warranty '
564+ 'without even the implied warranty of merchantability or '
565+ 'fitness for a particular purpose see the gnu library '
566+ 'general public license for more details you should have '
567+ 'received a copy of the gnu library general public '
568+ 'license along with this library see the file copying lib '
569+ 'if not write to the free software foundation 51 franklin '
570+ 'street fifth floor boston ma 02110 1301 usa' }
571+ ]
572+
548573 assert expected == result
574+ q .tokens
575+ # check rules token are the same exact set as the set of the last query run
576+ txtid = idx .tokens_by_tid
577+ qrt = [txtid [t ] for t in q .query_runs [- 1 ].tokens ]
578+ irt = [txtid [t ] for t in idx .tids_by_rid [0 ]]
579+ assert irt == qrt
549580
550581
551582class TestQueryWithFullIndex (FileBasedTesting ):
@@ -590,11 +621,16 @@ def test_query_run_tokens(self):
590621 assert 1 == len (result .query_runs )
591622 qr = result .query_runs [0 ]
592623 # NOTE: this is not a token present in any rules or licenses
624+ unknown_tokens = ('baridationally' ,)
625+ assert unknown_tokens not in idx .dictionary
626+ assert u' ' .join ([t for t in query_s .split () if t not in unknown_tokens ]) == u' ' .join (idx .tokens_by_tid [t ] for t in qr .tokens )
627+
628+ def test_query_run_tokens_matchable (self ):
629+ idx = cache .get_index ()
630+ # NOTE: this is not a token present in any rules or licenses
593631 unknown_token = u'baridationally'
594632 assert unknown_token not in idx .dictionary
595- assert u' ' .join ([t for t in query_s .split () if t not in (unknown_token , 'proc' )]) == u' ' .join (idx .tokens_by_tid [t ] for t in qr .tokens )
596633
597- def test_query_run_tokens_matchable (self ):
598634 query_s = u' ' .join (u'''
599635
600636 3 unable to create proc entry license gpl description driver author eric
@@ -607,27 +643,24 @@ def test_query_run_tokens_matchable(self):
607643 linux include asm include asm generic include acpi acpi c posix types 32 h
608644 types h types h h h h h
609645 ''' .split ())
610- idx = cache .get_index ()
611646 result = Query (query_string = query_s , idx = idx )
612-
613647 assert 1 == len (result .query_runs )
614648 qr = result .query_runs [0 ]
615649 expected_qr0 = u' ' .join (u'''
616- 3 unable to create entry license gpl description driver author eric depends 2
617- 6 24 19 generic smp mod module acpi register driver acpi disabled acpi
618- install notify acpi get status cache caches create entry generate event acpi
619- evaluate object acpi remove notify remove entry acpi driver acpi acpi gcc gnu
620- 4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack pointer current
621- stack pointer this module end usr src modules acpi include linux include asm
622- include asm generic include acpi acpi c posix types 32 h types h types h h h
623- h h
650+ 3 unable to create proc entry license gpl description driver author eric
651+ depends 2 6 24 19 generic smp mod module acpi register driver
652+ proc acpi disabled acpi install notify acpi get status cache
653+ caches create proc entry generate proc event acpi evaluate
654+ object acpi remove notify remove proc entry acpi driver acpi
655+ acpi gcc gnu 4 2 3 ubuntu 4 2 3 gcc gnu 4 2 3 ubuntu 4 2 3 current stack
656+ pointer current stack pointer this module end usr src modules acpi include
657+ linux include asm include asm generic include acpi acpi c posix types 32 h
658+ types h types h h h h h
624659 ''' .split ())
625660 assert expected_qr0 == u' ' .join (idx .tokens_by_tid [t ] for t in qr .tokens )
626661
627- # NOTE: this is not a token present in any rules or licenses
628- unknown_token = u'baridationally'
629- assert unknown_token not in idx .dictionary
630662 assert expected_qr0 == u' ' .join (idx .tokens_by_tid [t ] for p , t in enumerate (qr .tokens ) if p in qr .matchables )
631663
664+ # only gpl is in high matchables
632665 expected = u'gpl'
633666 assert expected == u' ' .join (idx .tokens_by_tid [t ] for p , t in enumerate (qr .tokens ) if p in qr .high_matchables )
0 commit comments