@@ -59,7 +59,7 @@ def _assert_tokens_eq(encoding, expected: List[int], actual: List[int]): # type
59
59
60
60
61
61
def read_expected_tokens (file_path : Path ) -> List [int ]:
62
- with open (file_path , "r" ) as f :
62
+ with open (file_path , "r" , encoding = "utf-8" ) as f :
63
63
return [int (x ) for x in f .read ().split ()]
64
64
65
65
@@ -78,7 +78,9 @@ def test_simple_convo(encoding_name):
78
78
encoding = load_harmony_encoding (encoding_name )
79
79
80
80
expected_text = (
81
- (ROOT_DIR / "test-data" / "test_simple_convo.txt" ).read_text ().rstrip ()
81
+ (ROOT_DIR / "test-data" / "test_simple_convo.txt" )
82
+ .read_text (encoding = "utf-8" )
83
+ .rstrip ()
82
84
)
83
85
expected_tokens = encoding .encode (expected_text , allowed_special = "all" )
84
86
@@ -143,7 +145,7 @@ def test_simple_convo_with_effort(encoding_name):
143
145
]
144
146
145
147
for effort , tokens_path , use_instruction in test_cases :
146
- expected_text = tokens_path .read_text ().rstrip ()
148
+ expected_text = tokens_path .read_text (encoding = "utf-8" ).rstrip ()
147
149
expected_tokens = encoding .encode (expected_text , allowed_special = "all" )
148
150
sys = (
149
151
SystemContent .new ()
@@ -299,7 +301,7 @@ def test_reasoning_system_message(encoding_name):
299
301
300
302
expected_text = (
301
303
(ROOT_DIR / "test-data" / "test_reasoning_system_message.txt" )
302
- .read_text ()
304
+ .read_text (encoding = "utf-8" )
303
305
.rstrip ()
304
306
)
305
307
expected = encoding .encode (expected_text , allowed_special = "all" )
@@ -336,7 +338,7 @@ def test_reasoning_system_message_no_instruction(encoding_name):
336
338
337
339
expected_text = (
338
340
(ROOT_DIR / "test-data" / "test_reasoning_system_message_no_instruction.txt" )
339
- .read_text ()
341
+ .read_text (encoding = "utf-8" )
340
342
.rstrip ()
341
343
)
342
344
expected = encoding .encode (expected_text , allowed_special = "all" )
@@ -376,7 +378,7 @@ def test_reasoning_system_message_with_dates(encoding_name):
376
378
377
379
expected_text = (
378
380
(ROOT_DIR / "test-data" / "test_reasoning_system_message_with_dates.txt" )
379
- .read_text ()
381
+ .read_text (encoding = "utf-8" )
380
382
.rstrip ()
381
383
)
382
384
expected = encoding .encode (expected_text , allowed_special = "all" )
@@ -409,7 +411,7 @@ def test_render_functions_with_parameters():
409
411
410
412
expected_output = (
411
413
(ROOT_DIR / "test-data" / "test_render_functions_with_parameters.txt" )
412
- .read_text ()
414
+ .read_text (encoding = "utf-8" )
413
415
.rstrip ()
414
416
)
415
417
@@ -526,7 +528,9 @@ def test_render_functions_with_parameters():
526
528
def test_no_tools ():
527
529
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
528
530
expected_output = (
529
- (ROOT_DIR / "test-data" / "test_no_tools.txt" ).read_text ().rstrip ()
531
+ (ROOT_DIR / "test-data" / "test_no_tools.txt" )
532
+ .read_text (encoding = "utf-8" )
533
+ .rstrip ()
530
534
)
531
535
532
536
convo = Conversation .from_messages (
@@ -546,7 +550,9 @@ def test_no_tools():
546
550
def test_browser_tool_only ():
547
551
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
548
552
expected_output = (
549
- (ROOT_DIR / "test-data" / "test_browser_tool_only.txt" ).read_text ().rstrip ()
553
+ (ROOT_DIR / "test-data" / "test_browser_tool_only.txt" )
554
+ .read_text (encoding = "utf-8" )
555
+ .rstrip ()
550
556
)
551
557
552
558
convo = Conversation .from_messages (
@@ -569,7 +575,7 @@ def test_browser_and_function_tool():
569
575
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
570
576
expected_output = (
571
577
(ROOT_DIR / "test-data" / "test_browser_and_function_tool.txt" )
572
- .read_text ()
578
+ .read_text (encoding = "utf-8" )
573
579
.rstrip ()
574
580
)
575
581
@@ -611,7 +617,7 @@ def test_browser_and_python_tool():
611
617
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
612
618
expected_output = (
613
619
(ROOT_DIR / "test-data" / "test_browser_and_python_tool.txt" )
614
- .read_text ()
620
+ .read_text (encoding = "utf-8" )
615
621
.rstrip ()
616
622
)
617
623
@@ -637,7 +643,7 @@ def test_dropping_cot_by_default():
637
643
638
644
expected_output = (
639
645
(ROOT_DIR / "test-data" / "test_dropping_cot_by_default.txt" )
640
- .read_text ()
646
+ .read_text (encoding = "utf-8" )
641
647
.rstrip ()
642
648
)
643
649
@@ -667,7 +673,7 @@ def test_does_not_drop_if_ongoing_analysis():
667
673
668
674
expected_output = (
669
675
(ROOT_DIR / "test-data" / "test_does_not_drop_if_ongoing_analysis.txt" )
670
- .read_text ()
676
+ .read_text (encoding = "utf-8" )
671
677
.rstrip ()
672
678
)
673
679
@@ -702,7 +708,9 @@ def test_preserve_cot():
702
708
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
703
709
704
710
expected_output = (
705
- (ROOT_DIR / "test-data" / "test_preserve_cot.txt" ).read_text ().rstrip ()
711
+ (ROOT_DIR / "test-data" / "test_preserve_cot.txt" )
712
+ .read_text (encoding = "utf-8" )
713
+ .rstrip ()
706
714
)
707
715
708
716
convo = Conversation .from_messages (
@@ -738,7 +746,7 @@ def test_keep_analysis_between_final_messages():
738
746
739
747
expected_output = (
740
748
(ROOT_DIR / "test-data" / "test_keep_analysis_between_finals.txt" )
741
- .read_text ()
749
+ .read_text (encoding = "utf-8" )
742
750
.rstrip ()
743
751
)
744
752
@@ -880,7 +888,9 @@ def test_tool_response_parsing():
880
888
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
881
889
882
890
text_tokens = (
883
- (ROOT_DIR / "test-data" / "test_tool_response_parsing.txt" ).read_text ().rstrip ()
891
+ (ROOT_DIR / "test-data" / "test_tool_response_parsing.txt" )
892
+ .read_text (encoding = "utf-8" )
893
+ .rstrip ()
884
894
)
885
895
886
896
expected_message = (
@@ -904,7 +914,9 @@ def test_streamable_parser():
904
914
encoding = load_harmony_encoding (HarmonyEncodingName .HARMONY_GPT_OSS )
905
915
906
916
text_tokens = (
907
- (ROOT_DIR / "test-data" / "test_streamable_parser.txt" ).read_text ().rstrip ()
917
+ (ROOT_DIR / "test-data" / "test_streamable_parser.txt" )
918
+ .read_text (encoding = "utf-8" )
919
+ .rstrip ()
908
920
)
909
921
910
922
tokens = encoding .encode (text_tokens , allowed_special = "all" )
0 commit comments