1
+ use std:: path:: Path ;
2
+
1
3
use crate :: {
2
4
chat:: {
3
5
Author , Conversation , DeveloperContent , Message , ReasoningEffort , Role , SystemContent ,
@@ -10,12 +12,25 @@ use crate::{
10
12
use pretty_assertions:: { assert_eq, Comparison } ;
11
13
use serde_json:: json;
12
14
13
- fn parse_tokens ( text : & str ) -> Vec < Rank > {
14
- text. split_whitespace ( )
15
+ fn parse_tokens ( text : impl AsRef < str > ) -> Vec < Rank > {
16
+ text. as_ref ( )
17
+ . split_whitespace ( )
15
18
. map ( |s| s. parse ( ) . unwrap ( ) )
16
19
. collect ( )
17
20
}
18
21
22
+ fn load_test_data ( path : impl AsRef < Path > ) -> String {
23
+ // on windows, we need to replace \r\n with \n
24
+ let cargo_manifest_dir = Path :: new ( env ! ( "CARGO_MANIFEST_DIR" ) ) ;
25
+ let src_dir = cargo_manifest_dir. join ( "src" ) ;
26
+ let path = src_dir. join ( path) ;
27
+ std:: fs:: read_to_string ( path)
28
+ . unwrap ( )
29
+ . replace ( "\r \n " , "\n " )
30
+ . trim_end ( )
31
+ . to_string ( )
32
+ }
33
+
19
34
const ENCODINGS : [ HarmonyEncodingName ; 1 ] = [ HarmonyEncodingName :: HarmonyGptOss ] ;
20
35
21
36
#[ test]
@@ -25,7 +40,7 @@ fn test_simple_convo() {
25
40
let expected_tokens = encoding
26
41
. tokenizer
27
42
. encode (
28
- include_str ! ( "../test-data/test_simple_convo.txt" ) . trim_end ( ) ,
43
+ load_test_data ( "../test-data/test_simple_convo.txt" ) . as_str ( ) ,
29
44
& encoding. tokenizer . special_tokens ( ) ,
30
45
)
31
46
. 0 ;
@@ -50,45 +65,42 @@ fn test_simple_convo_with_effort() {
50
65
let test_cases = [
51
66
(
52
67
ReasoningEffort :: Low ,
53
- include_str ! ( "../test-data/test_simple_convo_low_effort.txt" ) ,
68
+ load_test_data ( "../test-data/test_simple_convo_low_effort.txt" ) ,
54
69
true ,
55
70
) ,
56
71
(
57
72
ReasoningEffort :: Medium ,
58
- include_str ! ( "../test-data/test_simple_convo_medium_effort.txt" ) ,
73
+ load_test_data ( "../test-data/test_simple_convo_medium_effort.txt" ) ,
59
74
true ,
60
75
) ,
61
76
(
62
77
ReasoningEffort :: High ,
63
- include_str ! ( "../test-data/test_simple_convo_high_effort.txt" ) ,
78
+ load_test_data ( "../test-data/test_simple_convo_high_effort.txt" ) ,
64
79
true ,
65
80
) ,
66
81
(
67
82
ReasoningEffort :: Low ,
68
- include_str ! ( "../test-data/test_simple_convo_low_effort_no_instruction.txt" ) ,
83
+ load_test_data ( "../test-data/test_simple_convo_low_effort_no_instruction.txt" ) ,
69
84
false ,
70
85
) ,
71
86
(
72
87
ReasoningEffort :: Medium ,
73
- include_str ! ( "../test-data/test_simple_convo_medium_effort_no_instruction.txt" ) ,
88
+ load_test_data ( "../test-data/test_simple_convo_medium_effort_no_instruction.txt" ) ,
74
89
false ,
75
90
) ,
76
91
(
77
92
ReasoningEffort :: High ,
78
- include_str ! ( "../test-data/test_simple_convo_high_effort_no_instruction.txt" ) ,
93
+ load_test_data ( "../test-data/test_simple_convo_high_effort_no_instruction.txt" ) ,
79
94
false ,
80
95
) ,
81
96
] ;
82
97
83
98
for encoding_name in ENCODINGS {
84
99
let encoding = load_harmony_encoding ( encoding_name) . unwrap ( ) ;
85
- for ( effort, expected_text, use_instruction) in test_cases {
100
+ for & ( effort, ref expected_text, use_instruction) in & test_cases {
86
101
let expected_tokens = encoding
87
102
. tokenizer
88
- . encode (
89
- expected_text. trim_end ( ) ,
90
- & encoding. tokenizer . special_tokens ( ) ,
91
- )
103
+ . encode ( expected_text. as_str ( ) , & encoding. tokenizer . special_tokens ( ) )
92
104
. 0 ;
93
105
let sys = SystemContent :: new ( )
94
106
. with_model_identity ( "You are ChatGPT, a large language model trained by OpenAI." )
@@ -123,8 +135,8 @@ fn test_simple_convo_with_effort() {
123
135
124
136
#[ test]
125
137
fn test_simple_reasoning_response ( ) {
126
- let expected_tokens = parse_tokens ( include_str ! (
127
- "../test-data/test_simple_reasoning_response.txt"
138
+ let expected_tokens = parse_tokens ( load_test_data (
139
+ "../test-data/test_simple_reasoning_response.txt" ,
128
140
) ) ;
129
141
for encoding_name in ENCODINGS {
130
142
let encoding = load_harmony_encoding ( encoding_name) . unwrap ( ) ;
@@ -180,7 +192,7 @@ fn test_reasoning_system_message() {
180
192
let expected = encoding
181
193
. tokenizer
182
194
. encode (
183
- include_str ! ( "../test-data/test_reasoning_system_message.txt" ) . trim_end ( ) ,
195
+ load_test_data ( "../test-data/test_reasoning_system_message.txt" ) . as_str ( ) ,
184
196
& encoding. tokenizer . special_tokens ( ) ,
185
197
)
186
198
. 0 ;
@@ -211,8 +223,8 @@ fn test_reasoning_system_message_no_instruction() {
211
223
let expected = encoding
212
224
. tokenizer
213
225
. encode (
214
- include_str ! ( "../test-data/test_reasoning_system_message_no_instruction.txt" )
215
- . trim_end ( ) ,
226
+ load_test_data ( "../test-data/test_reasoning_system_message_no_instruction.txt" )
227
+ . as_str ( ) ,
216
228
& encoding. tokenizer . special_tokens ( ) ,
217
229
)
218
230
. 0 ;
@@ -245,8 +257,8 @@ fn test_reasoning_system_message_with_dates() {
245
257
let expected = encoding
246
258
. tokenizer
247
259
. encode (
248
- include_str ! ( "../test-data/test_reasoning_system_message_with_dates.txt" )
249
- . trim_end ( ) ,
260
+ load_test_data ( "../test-data/test_reasoning_system_message_with_dates.txt" )
261
+ . as_str ( ) ,
250
262
& encoding. tokenizer . special_tokens ( ) ,
251
263
)
252
264
. 0 ;
@@ -275,8 +287,7 @@ fn test_reasoning_system_message_with_dates() {
275
287
#[ test]
276
288
fn test_render_functions_with_parameters ( ) {
277
289
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
278
- let expected_output =
279
- include_str ! ( "../test-data/test_render_functions_with_parameters.txt" ) . trim_end ( ) ;
290
+ let expected_output = load_test_data ( "../test-data/test_render_functions_with_parameters.txt" ) ;
280
291
281
292
let sys = SystemContent :: new ( )
282
293
. with_reasoning_effort ( ReasoningEffort :: High )
@@ -382,7 +393,7 @@ fn test_render_functions_with_parameters() {
382
393
#[ test]
383
394
fn test_browser_and_python_tool ( ) {
384
395
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
385
- let expected_output = include_str ! ( "../test-data/test_browser_and_python_tool.txt" ) . trim_end ( ) ;
396
+ let expected_output = load_test_data ( "../test-data/test_browser_and_python_tool.txt" ) ;
386
397
387
398
let convo = Conversation :: from_messages ( [ Message :: from_role_and_content (
388
399
Role :: System ,
@@ -403,7 +414,7 @@ fn test_browser_and_python_tool() {
403
414
#[ test]
404
415
fn test_dropping_cot_by_default ( ) {
405
416
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
406
- let expected_output = include_str ! ( "../test-data/test_dropping_cot_by_default.txt" ) . trim_end ( ) ;
417
+ let expected_output = load_test_data ( "../test-data/test_dropping_cot_by_default.txt" ) ;
407
418
408
419
let convo = Conversation :: from_messages ( [
409
420
Message :: from_role_and_content ( Role :: User , "What is 2 + 2?" ) ,
@@ -433,8 +444,7 @@ fn test_dropping_cot_by_default() {
433
444
#[ test]
434
445
fn test_does_not_drop_if_ongoing_analysis ( ) {
435
446
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
436
- let expected_output =
437
- include_str ! ( "../test-data/test_does_not_drop_if_ongoing_analysis.txt" ) . trim_end ( ) ;
447
+ let expected_output = load_test_data ( "../test-data/test_does_not_drop_if_ongoing_analysis.txt" ) ;
438
448
439
449
let convo = Conversation :: from_messages ( [
440
450
Message :: from_role_and_content ( Role :: User , "What is the weather in SF?" ) ,
@@ -470,7 +480,7 @@ fn test_does_not_drop_if_ongoing_analysis() {
470
480
#[ test]
471
481
fn test_preserve_cot ( ) {
472
482
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
473
- let expected_output = include_str ! ( "../test-data/test_preserve_cot.txt" ) . trim_end ( ) ;
483
+ let expected_output = load_test_data ( "../test-data/test_preserve_cot.txt" ) ;
474
484
475
485
let convo = Conversation :: from_messages ( [
476
486
Message :: from_role_and_content ( Role :: User , "What is 2 + 2?" ) ,
@@ -534,10 +544,10 @@ fn test_decode_utf8_invalid_token() {
534
544
#[ test]
535
545
fn test_tool_response_parsing ( ) {
536
546
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
537
- let text_tokens = include_str ! ( "../test-data/test_tool_response_parsing.txt" ) . trim_end ( ) ;
547
+ let text_tokens = load_test_data ( "../test-data/test_tool_response_parsing.txt" ) ;
538
548
let tokens = encoding
539
549
. tokenizer
540
- . encode ( text_tokens, & encoding. tokenizer . special_tokens ( ) )
550
+ . encode ( & text_tokens, & encoding. tokenizer . special_tokens ( ) )
541
551
. 0 ;
542
552
543
553
let expected_message = Message :: from_author_and_content (
@@ -616,10 +626,10 @@ fn test_invalid_utf8_decoding() {
616
626
#[ test]
617
627
fn test_streamable_parser ( ) {
618
628
let encoding = load_harmony_encoding ( HarmonyEncodingName :: HarmonyGptOss ) . unwrap ( ) ;
619
- let text = include_str ! ( "../test-data/test_streamable_parser.txt" ) . trim_end ( ) ;
629
+ let text = load_test_data ( "../test-data/test_streamable_parser.txt" ) ;
620
630
let tokens = encoding
621
631
. tokenizer
622
- . encode ( text, & encoding. tokenizer . special_tokens ( ) )
632
+ . encode ( & text, & encoding. tokenizer . special_tokens ( ) )
623
633
. 0 ;
624
634
let mut parser =
625
635
crate :: encoding:: StreamableParser :: new ( encoding. clone ( ) , Some ( Role :: Assistant ) ) . unwrap ( ) ;
0 commit comments