1
1
# Copyright (c) Meta Platforms, Inc. and affiliates.
2
2
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
3
3
4
- import pytest
5
- from pytest import approx
4
+ import os
6
5
from unittest .mock import patch
7
6
7
+ import pytest
8
+
8
9
import torch
10
+ from llama_recipes .data .sampler import LengthBasedBatchSampler
11
+
12
+ from llama_recipes .finetuning import main
13
+ from pytest import approx
9
14
from torch .optim import AdamW
10
15
from torch .utils .data .dataloader import DataLoader
11
16
from torch .utils .data .sampler import BatchSampler
12
17
13
- from llama_recipes .finetuning import main
14
- from llama_recipes .data .sampler import LengthBasedBatchSampler
15
-
16
18
17
19
def get_fake_dataset ():
18
- return [{
19
- "input_ids" :[1 ],
20
- "attention_mask" :[1 ],
21
- "labels" :[1 ],
22
- }]
23
-
24
- @patch ('llama_recipes.finetuning.torch.cuda.is_available' )
25
- @patch ('llama_recipes.finetuning.train' )
26
- @patch ('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained' )
27
- @patch ('llama_recipes.finetuning.AutoTokenizer.from_pretrained' )
28
- @patch ('llama_recipes.finetuning.get_preprocessed_dataset' )
29
- @patch ('llama_recipes.finetuning.optim.AdamW' )
30
- @patch ('llama_recipes.finetuning.StepLR' )
20
+ return [
21
+ {
22
+ "input_ids" : [1 ],
23
+ "attention_mask" : [1 ],
24
+ "labels" : [1 ],
25
+ }
26
+ ]
27
+
28
+
29
+ @patch ("llama_recipes.finetuning.torch.cuda.is_available" )
30
+ @patch ("llama_recipes.finetuning.train" )
31
+ @patch ("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained" )
32
+ @patch ("llama_recipes.finetuning.AutoTokenizer.from_pretrained" )
33
+ @patch ("llama_recipes.finetuning.get_preprocessed_dataset" )
34
+ @patch ("llama_recipes.finetuning.optim.AdamW" )
35
+ @patch ("llama_recipes.finetuning.StepLR" )
31
36
@pytest .mark .parametrize ("cuda_is_available" , [True , False ])
32
- def test_finetuning_no_validation (step_lr , optimizer , get_dataset , tokenizer , get_model , train , cuda , cuda_is_available ):
37
+ def test_finetuning_no_validation (
38
+ step_lr ,
39
+ optimizer ,
40
+ get_dataset ,
41
+ tokenizer ,
42
+ get_model ,
43
+ train ,
44
+ cuda ,
45
+ cuda_is_available ,
46
+ ):
33
47
kwargs = {"run_validation" : False }
34
48
35
49
get_dataset .return_value = get_fake_dataset ()
36
50
cuda .return_value = cuda_is_available
37
51
52
+ get_model .return_value .get_input_embeddings .return_value .weight .shape = [0 ]
53
+
38
54
main (** kwargs )
39
55
40
56
assert train .call_count == 1
@@ -53,20 +69,31 @@ def test_finetuning_no_validation(step_lr, optimizer, get_dataset, tokenizer, ge
53
69
assert get_model .return_value .to .call_count == 0
54
70
55
71
56
- @patch (' llama_recipes.finetuning.torch.cuda.is_available' )
57
- @patch (' llama_recipes.finetuning.train' )
58
- @patch (' llama_recipes.finetuning.LlamaForCausalLM.from_pretrained' )
59
- @patch (' llama_recipes.finetuning.AutoTokenizer.from_pretrained' )
60
- @patch (' llama_recipes.finetuning.get_preprocessed_dataset' )
61
- @patch (' llama_recipes.finetuning.optim.AdamW' )
62
- @patch (' llama_recipes.finetuning.StepLR' )
72
+ @patch (" llama_recipes.finetuning.torch.cuda.is_available" )
73
+ @patch (" llama_recipes.finetuning.train" )
74
+ @patch (" llama_recipes.finetuning.LlamaForCausalLM.from_pretrained" )
75
+ @patch (" llama_recipes.finetuning.AutoTokenizer.from_pretrained" )
76
+ @patch (" llama_recipes.finetuning.get_preprocessed_dataset" )
77
+ @patch (" llama_recipes.finetuning.optim.AdamW" )
78
+ @patch (" llama_recipes.finetuning.StepLR" )
63
79
@pytest .mark .parametrize ("cuda_is_available" , [True , False ])
64
- def test_finetuning_with_validation (step_lr , optimizer , get_dataset , tokenizer , get_model , train , cuda , cuda_is_available ):
80
+ def test_finetuning_with_validation (
81
+ step_lr ,
82
+ optimizer ,
83
+ get_dataset ,
84
+ tokenizer ,
85
+ get_model ,
86
+ train ,
87
+ cuda ,
88
+ cuda_is_available ,
89
+ ):
65
90
kwargs = {"run_validation" : True }
66
91
67
92
get_dataset .return_value = get_fake_dataset ()
68
93
cuda .return_value = cuda_is_available
69
94
95
+ get_model .return_value .get_input_embeddings .return_value .weight .shape = [0 ]
96
+
70
97
main (** kwargs )
71
98
72
99
assert train .call_count == 1
@@ -83,22 +110,36 @@ def test_finetuning_with_validation(step_lr, optimizer, get_dataset, tokenizer,
83
110
else :
84
111
assert get_model .return_value .to .call_count == 0
85
112
86
- @patch ('llama_recipes.finetuning.torch.cuda.is_available' )
87
- @patch ('llama_recipes.finetuning.train' )
88
- @patch ('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained' )
89
- @patch ('llama_recipes.finetuning.AutoTokenizer.from_pretrained' )
90
- @patch ('llama_recipes.finetuning.get_preprocessed_dataset' )
91
- @patch ('llama_recipes.finetuning.generate_peft_config' )
92
- @patch ('llama_recipes.finetuning.get_peft_model' )
93
- @patch ('llama_recipes.finetuning.optim.AdamW' )
94
- @patch ('llama_recipes.finetuning.StepLR' )
113
+
114
+ @patch ("llama_recipes.finetuning.torch.cuda.is_available" )
115
+ @patch ("llama_recipes.finetuning.train" )
116
+ @patch ("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained" )
117
+ @patch ("llama_recipes.finetuning.AutoTokenizer.from_pretrained" )
118
+ @patch ("llama_recipes.finetuning.get_preprocessed_dataset" )
119
+ @patch ("llama_recipes.finetuning.generate_peft_config" )
120
+ @patch ("llama_recipes.finetuning.get_peft_model" )
121
+ @patch ("llama_recipes.finetuning.optim.AdamW" )
122
+ @patch ("llama_recipes.finetuning.StepLR" )
95
123
@pytest .mark .parametrize ("cuda_is_available" , [True , False ])
96
- def test_finetuning_peft (step_lr , optimizer , get_peft_model , gen_peft_config , get_dataset , tokenizer , get_model , train , cuda , cuda_is_available ):
124
+ def test_finetuning_peft_lora (
125
+ step_lr ,
126
+ optimizer ,
127
+ get_peft_model ,
128
+ gen_peft_config ,
129
+ get_dataset ,
130
+ tokenizer ,
131
+ get_model ,
132
+ train ,
133
+ cuda ,
134
+ cuda_is_available ,
135
+ ):
97
136
kwargs = {"use_peft" : True }
98
137
99
138
get_dataset .return_value = get_fake_dataset ()
100
139
cuda .return_value = cuda_is_available
101
140
141
+ get_model .return_value .get_input_embeddings .return_value .weight .shape = [0 ]
142
+
102
143
main (** kwargs )
103
144
104
145
if cuda_is_available :
@@ -110,21 +151,64 @@ def test_finetuning_peft(step_lr, optimizer, get_peft_model, gen_peft_config, ge
110
151
assert get_peft_model .return_value .print_trainable_parameters .call_count == 1
111
152
112
153
113
- @patch ('llama_recipes.finetuning.train' )
114
- @patch ('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained' )
115
- @patch ('llama_recipes.finetuning.AutoTokenizer.from_pretrained' )
116
- @patch ('llama_recipes.finetuning.get_preprocessed_dataset' )
117
- @patch ('llama_recipes.finetuning.get_peft_model' )
118
- @patch ('llama_recipes.finetuning.StepLR' )
119
- def test_finetuning_weight_decay (step_lr , get_peft_model , get_dataset , tokenizer , get_model , train , mocker ):
120
- kwargs = {"weight_decay" : 0.01 }
154
+ @patch ("llama_recipes.finetuning.get_peft_model" )
155
+ @patch ("llama_recipes.finetuning.setup" )
156
+ @patch ("llama_recipes.finetuning.train" )
157
+ @patch ("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained" )
158
+ @patch ("llama_recipes.finetuning.AutoTokenizer.from_pretrained" )
159
+ @patch ("llama_recipes.finetuning.get_preprocessed_dataset" )
160
+ def test_finetuning_peft_llama_adapter (
161
+ get_dataset , tokenizer , get_model , train , setup , get_peft_model
162
+ ):
163
+ kwargs = {
164
+ "use_peft" : True ,
165
+ "peft_method" : "llama_adapter" ,
166
+ "enable_fsdp" : True ,
167
+ }
121
168
122
169
get_dataset .return_value = get_fake_dataset ()
123
170
124
- model = mocker .MagicMock (name = "Model" )
125
- model .parameters .return_value = [torch .ones (1 ,1 )]
171
+ get_model .return_value .get_input_embeddings .return_value .weight .shape = [0 ]
172
+
173
+ os .environ ["RANK" ] = "0"
174
+ os .environ ["LOCAL_RANK" ] = "0"
175
+ os .environ ["WORLD_SIZE" ] = "1"
176
+ os .environ ["MASTER_ADDR" ] = "localhost"
177
+ os .environ ["MASTER_PORT" ] = "12345"
178
+
179
+ with pytest .raises (
180
+ RuntimeError ,
181
+ match = "Llama_adapter is currently not supported in combination with FSDP" ,
182
+ ):
183
+ main (** kwargs )
184
+
185
+ GET_ME_OUT = "Get me out of here"
186
+ get_peft_model .side_effect = RuntimeError (GET_ME_OUT )
187
+
188
+ kwargs ["enable_fsdp" ] = False
189
+
190
+ with pytest .raises (
191
+ RuntimeError ,
192
+ match = GET_ME_OUT ,
193
+ ):
194
+ main (** kwargs )
195
+
126
196
127
- get_model .return_value = model
197
+ @patch ("llama_recipes.finetuning.train" )
198
+ @patch ("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained" )
199
+ @patch ("llama_recipes.finetuning.AutoTokenizer.from_pretrained" )
200
+ @patch ("llama_recipes.finetuning.get_preprocessed_dataset" )
201
+ @patch ("llama_recipes.finetuning.get_peft_model" )
202
+ @patch ("llama_recipes.finetuning.StepLR" )
203
+ def test_finetuning_weight_decay (
204
+ step_lr , get_peft_model , get_dataset , tokenizer , get_model , train
205
+ ):
206
+ kwargs = {"weight_decay" : 0.01 }
207
+
208
+ get_dataset .return_value = get_fake_dataset ()
209
+
210
+ get_model .return_value .parameters .return_value = [torch .ones (1 , 1 )]
211
+ get_model .return_value .get_input_embeddings .return_value .weight .shape = [0 ]
128
212
129
213
main (** kwargs )
130
214
@@ -139,17 +223,21 @@ def test_finetuning_weight_decay(step_lr, get_peft_model, get_dataset, tokenizer
139
223
assert optimizer .state_dict ()["param_groups" ][0 ]["weight_decay" ] == approx (0.01 )
140
224
141
225
142
- @patch ('llama_recipes.finetuning.train' )
143
- @patch ('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained' )
144
- @patch ('llama_recipes.finetuning.AutoTokenizer.from_pretrained' )
145
- @patch ('llama_recipes.finetuning.get_preprocessed_dataset' )
146
- @patch ('llama_recipes.finetuning.optim.AdamW' )
147
- @patch ('llama_recipes.finetuning.StepLR' )
148
- def test_batching_strategy (step_lr , optimizer , get_dataset , tokenizer , get_model , train ):
226
+ @patch ("llama_recipes.finetuning.train" )
227
+ @patch ("llama_recipes.finetuning.LlamaForCausalLM.from_pretrained" )
228
+ @patch ("llama_recipes.finetuning.AutoTokenizer.from_pretrained" )
229
+ @patch ("llama_recipes.finetuning.get_preprocessed_dataset" )
230
+ @patch ("llama_recipes.finetuning.optim.AdamW" )
231
+ @patch ("llama_recipes.finetuning.StepLR" )
232
+ def test_batching_strategy (
233
+ step_lr , optimizer , get_dataset , tokenizer , get_model , train
234
+ ):
149
235
kwargs = {"batching_strategy" : "packing" }
150
236
151
237
get_dataset .return_value = get_fake_dataset ()
152
238
239
+ get_model .return_value .get_input_embeddings .return_value .weight .shape = [0 ]
240
+
153
241
main (** kwargs )
154
242
155
243
assert train .call_count == 1
0 commit comments