@@ -31,7 +31,7 @@ def test_decoding_compression(token_buffer_size):
3131 """Test that DecodingPress compresses the cache during decoding."""
3232
3333 # Initialize pipeline with a small model
34- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
34+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
3535
3636 # Create a DecodingPress with KnormPress
3737 press = DecodingPress (
@@ -65,7 +65,7 @@ def test_prefill_decoding_press_calls_both_phases():
6565 """Test that PrefillDecodingPress calls both prefilling and decoding presses."""
6666
6767 # Initialize pipeline
68- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
68+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
6969
7070 # Create PrefillDecodingPress with both presses
7171 combined_press = PrefillDecodingPress (
@@ -99,7 +99,7 @@ def test_decoding_press_without_prefill():
9999 """Test that DecodingPress works correctly when used standalone (no prefill compression)."""
100100
101101 # Initialize pipeline
102- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
102+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
103103
104104 # Create DecodingPress only
105105 decoding_press = DecodingPress (base_press = KnormPress (compression_ratio = 0.4 ), compression_interval = 5 , target_size = 64 )
@@ -129,7 +129,7 @@ def test_prefill_decoding_press_decoding_only():
129129 """Test PrefillDecodingPress with only decoding press (no prefill compression)."""
130130
131131 # Initialize pipeline
132- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
132+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
133133
134134 # Create PrefillDecodingPress with only decoding press
135135 combined_press = PrefillDecodingPress (
@@ -167,7 +167,7 @@ def test_decoding_press_equivalence():
167167 torch .manual_seed (42 )
168168
169169 # Initialize pipeline
170- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
170+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
171171
172172 # Create standalone decoding press
173173 decoding_press = DecodingPress (base_press = KnormPress (compression_ratio = 0.5 ), compression_interval = 3 , target_size = 52 )
@@ -222,7 +222,7 @@ def test_all_presses_work_with_decoding_press(press_config):
222222 """Test that all default presses work as base presses for DecodingPress."""
223223
224224 # Initialize pipeline
225- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
225+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
226226
227227 # Get press class and use the first (easier) configuration
228228 press_cls = press_config ["cls" ]
@@ -274,7 +274,7 @@ def test_all_presses_work_with_decoding_press(press_config):
274274def test_compression_actually_reduces_memory ():
275275 """Test that compression actually reduces memory usage compared to no compression."""
276276
277- pipe = pipeline ("kv-press-text-generation" , model = "MaxJeblick/llama2-0b-unit-test " , device_map = "auto" )
277+ pipe = pipeline ("kv-press-text-generation" , model = "Qwen/Qwen3-0.6B " , device_map = "auto" )
278278
279279 context = "The quick brown fox jumps over the lazy dog. " * 15 # Long context
280280 question = "What animal jumps over the dog?"
0 commit comments