22
33import pytest
44
5- from guidance import gen , models
5+ from guidance import gen , models , select
66
77
88def test_basic ():
@@ -73,6 +73,56 @@ def test_stop_quote(selected_model):
7373 assert not lm ["title" ].endswith ('"' )
7474
7575
76+ def test_metrics_smoke (selected_model : models .Model ):
77+ lm = selected_model
78+ lm .engine .reset_metrics ()
79+
80+ lm += "abcd"
81+ print (f"{ lm .engine .metrics = } " )
82+ lm += gen ("first" , max_tokens = 1 )
83+ print (f"{ lm .engine .metrics = } " )
84+ # Can't be sure of exact count due to token healing
85+ assert (
86+ lm .engine .metrics .engine_output_tokens == 1
87+ or lm .engine .metrics .engine_output_tokens == 2
88+ )
89+ assert lm .engine .metrics .engine_input_tokens >= 1
90+ last_input_tokens = lm .engine .metrics .engine_input_tokens
91+
92+ lm += "fg"
93+ lm += gen ("second" , max_tokens = 1 )
94+ # Again, trouble with healing
95+ assert (
96+ lm .engine .metrics .engine_output_tokens >= 2
97+ or lm .engine .metrics .engine_output_tokens <= 4
98+ )
99+ assert lm .engine .metrics .engine_input_tokens > last_input_tokens
100+
101+
102+ def test_metrics_select (selected_model : models .Model ):
103+ lm = selected_model
104+ lm .engine .reset_metrics ()
105+
106+ lm += "I will "
107+ lm += select (
108+ [
109+ "ride a bicycle down the road" ,
110+ "row in a boat along the river" ,
111+ "go for a swim in the ocean" ,
112+ ]
113+ )
114+ print (f"lm={ str (lm )} " )
115+ print (f"{ lm .engine .metrics = } " )
116+ assert lm .engine .metrics .engine_input_tokens > 1
117+ assert lm .engine .metrics .engine_output_tokens > 0
118+ # Guidance should be able to force the generation after only a couple of tokens
119+ # so even though the options are long, relatively few output tokens should be
120+ # needed
121+ assert (
122+ lm .engine .metrics .engine_input_tokens > lm .engine .metrics .engine_output_tokens
123+ )
124+
125+
76126def test_unicode (selected_model ):
77127 # black makes this test ugly -- easier to read with fmt: off
78128 # fmt: off
@@ -85,11 +135,18 @@ def test_unicode(selected_model):
85135 # fmt: on
86136
87137
88- def test_unicode2 (selected_model ):
138+ def test_unicode2 (selected_model : models . Model ):
89139 lm = selected_model
140+ lm .engine .reset_metrics ()
90141 prompt = "Janet’s ducks lay 16 eggs per day"
91142 lm += prompt + gen (max_tokens = 10 )
92- assert True
143+ assert lm .engine .metrics .engine_input_tokens > 1
144+ # Due to token healing, we can't be sure of the
145+ # precise output count
146+ assert (
147+ lm .engine .metrics .engine_output_tokens == 10
148+ or lm .engine .metrics .engine_output_tokens == 11
149+ )
93150
94151
95152def test_gsm8k ():
0 commit comments