From 93bee1a78e75b4614dbb7c9fcb4bbab9125e57ab Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 16 Sep 2024 10:10:42 +0200 Subject: [PATCH 01/63] Make generation tests generic --- tests/composition/test_parallel.py | 8 ++++---- tests/methods/test_compacter.py | 8 ++++---- tests/methods/test_prefix_tuning.py | 8 ++++---- tests/test_adapter.py | 9 +++++++++ tests/test_adapter_embeddings.py | 2 +- tests/test_adapter_heads.py | 6 ++---- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/tests/composition/test_parallel.py b/tests/composition/test_parallel.py index 31dce09969..c385ea24d1 100644 --- a/tests/composition/test_parallel.py +++ b/tests/composition/test_parallel.py @@ -131,10 +131,10 @@ def test_parallel_generate(self): seq_output_length = 32 # Finally, also check if generation works properly - if self.is_speech_model: - input_ids = self.get_input_samples((1, 80, 3000), config=model1.config)["input_features"] - else: - input_ids = self.get_input_samples((1, 4), config=model1.config)["input_ids"] + input_ids = self.extract_input_ids( + self.get_input_samples(self.generate_input_samples_shape, config=model1.config) + ) + input_ids = input_ids.to(torch_device) generated = model1.generate(input_ids, max_length=seq_output_length) self.assertLessEqual(generated.shape, (2, seq_output_length)) diff --git a/tests/methods/test_compacter.py b/tests/methods/test_compacter.py index 292fab1efb..2c91b75366 100644 --- a/tests/methods/test_compacter.py +++ b/tests/methods/test_compacter.py @@ -71,10 +71,10 @@ def test_compacter_generate(self): seq_output_length = 32 # Finally, also check if generation works properly - if self.is_speech_model: - input_ids = self.get_input_samples((1, 80, 3000), config=model1.config)["input_features"] - else: - input_ids = self.get_input_samples((1, 4), config=model1.config)["input_ids"] + input_ids = self.extract_input_ids( + self.get_input_samples(self.generate_input_samples_shape, config=model1.config) + ) + input_ids = input_ids.to(torch_device) generated = model1.generate(input_ids, max_length=seq_output_length) self.assertLessEqual(generated.shape, (1, seq_output_length)) diff --git a/tests/methods/test_prefix_tuning.py b/tests/methods/test_prefix_tuning.py index dd443c0d0b..9c3b0822a2 100644 --- a/tests/methods/test_prefix_tuning.py +++ b/tests/methods/test_prefix_tuning.py @@ -94,10 +94,10 @@ def test_prefix_tuning_generate(self): seq_output_length = 32 # Finally, also check if generation works properly - if self.is_speech_model: - input_ids = self.get_input_samples((1, 80, 3000), config=model1.config)["input_features"] - else: - input_ids = self.get_input_samples((1, 4), config=model1.config)["input_ids"] + input_ids = self.extract_input_ids( + self.get_input_samples(self.generate_input_samples_shape, config=model1.config) + ) + input_ids = input_ids.to(torch_device) generated = model1.generate(input_ids, max_length=seq_output_length) self.assertLessEqual(generated.shape, (1, seq_output_length)) diff --git a/tests/test_adapter.py b/tests/test_adapter.py index bafa7e65a9..1802aa5c05 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -35,8 +35,10 @@ def ids_tensor(shape, vocab_size, rng=None, name=None): class AdapterTestBase: # If not overriden by subclass, AutoModel should be used. model_class = AutoAdapterModel + tokenizer_name = "tests/fixtures/SiBERT" # Default shape of inputs to use default_input_samples_shape = (3, 64) + generate_input_samples_shape = (1, 4) leave_out_layers = [0, 1] do_run_train_tests = True # default arguments for test_adapter_heads @@ -98,6 +100,9 @@ def assert_adapter_unavailable(self, model, adapter_name): self.assertFalse(adapter_name in model.adapters_config) self.assertEqual(len(model.get_adapter(adapter_name)), 0) + def extract_input_ids(self, inputs): + return inputs["input_ids"] + class VisionAdapterTestBase(AdapterTestBase): default_input_samples_shape = (3, 3, 224, 224) @@ -146,10 +151,14 @@ class SpeechAdapterTestBase(AdapterTestBase): """Base class for speech adapter tests.""" default_input_samples_shape = (3, 80, 3000) # (batch_size, n_mels, enc_seq_len) + generate_input_samples_shape = (1, 80, 3000) is_speech_model = True # Flag for tests to determine if the model is a speech model due to input format difference time_window = 3000 # Time window for audio samples seq_length = 80 + def extract_input_ids(self, inputs): + return inputs["input_features"] + def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): """Adds a head to the model.""" if head_type == "audio_classification": diff --git a/tests/test_adapter_embeddings.py b/tests/test_adapter_embeddings.py index 160828c776..64a07d381f 100644 --- a/tests/test_adapter_embeddings.py +++ b/tests/test_adapter_embeddings.py @@ -182,6 +182,6 @@ def _instantiate_tokenizer(self, model): tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) input_data = self.get_input_samples(config=self.config()) else: - tokenizer = AutoTokenizer.from_pretrained("tests/fixtures/SiBERT") + tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) input_data = self.get_input_samples((1, 128), vocab_size=tokenizer.vocab_size, config=model.config) return tokenizer, input_data diff --git a/tests/test_adapter_heads.py b/tests/test_adapter_heads.py index 541debf35b..c0c3812cc2 100644 --- a/tests/test_adapter_heads.py +++ b/tests/test_adapter_heads.py @@ -175,10 +175,8 @@ def test_seq2seq_lm_head(self): # Finally, also check if generation works properly input_shape = self._get_input_shape() - if self.is_speech_model: - input_ids = self.get_input_samples(input_shape, config=model1.config)["input_features"] - else: - input_ids = self.get_input_samples(input_shape, config=model1.config)["input_ids"] + input_ids = self.extract_input_ids(self.get_input_samples(input_shape, config=model1.config)) + input_ids = input_ids.to(torch_device) # Use a different length for the seq2seq output seq_output_length = self.seq_length + 30 From 7e65e825150c6bec364e9ce4aa74a21a817e8489 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 28 Oct 2024 10:13:59 +0100 Subject: [PATCH 02/63] Draft Refactoring AdapterTestBase --- tests/test_adapter.py | 217 ++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 116 deletions(-) diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 1802aa5c05..fe72ae6ecf 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -16,39 +16,33 @@ def make_config(config_class, **kwargs): return staticmethod(lambda: config_class(**kwargs)) -def ids_tensor(shape, vocab_size, rng=None, name=None): - # Creates a random int32 tensor of the shape within the vocab size - if rng is None: - rng = global_rng +class AbstractAdapterTestBase: + """Base class for adapter tests. Defines basic functions and attributes with default values which are used in the tests. + Model test classes should inherit from this class or subclass and override the attributes and functions as needed. + """ - total_dims = 1 - for dim in shape: - total_dims *= dim + model_class = AutoAdapterModel + tokenizer_name = "tests/fixtures/SiBERT" # path to default tokenizer config available in the test repo + config = None # specified in the actual model test classes + input_shape = () # (batch_size, seq_length) + input_shape_generate = () # (batch_size, seq_length) + leave_out_layers = [] + do_run_train_tests = True - values = [] - for _ in range(total_dims): - values.append(rng.randint(0, vocab_size - 1)) + def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): + """Creates a dummy batch of samples in the format required for the model.""" + raise NotImplementedError("get_input_samples() must be implemented in the subclass.") - return torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() + def add_head(self, model, name, **kwargs): + """Adds a dummy head to the model.""" + raise NotImplementedError("add_head() must be implemented in the subclass.") + def get_dataset(self, **kwargs): + """Loads a dummy dataset for the model.""" + raise NotImplementedError("get_dataset() must be implemented in the subclass.") -class AdapterTestBase: - # If not overriden by subclass, AutoModel should be used. - model_class = AutoAdapterModel - tokenizer_name = "tests/fixtures/SiBERT" - # Default shape of inputs to use - default_input_samples_shape = (3, 64) - generate_input_samples_shape = (1, 4) - leave_out_layers = [0, 1] - do_run_train_tests = True - # default arguments for test_adapter_heads - batch_size = 1 - seq_length = 128 - is_speech_model = ( - False # Flag for tests to determine if the model is a speech model due to input format difference - ) - - def get_model(self): + def build_model(self): + """Builds a model instance for testing based on the provied model configuration.""" if self.model_class == AutoAdapterModel: model = AutoAdapterModel.from_config(self.config()) else: @@ -57,32 +51,67 @@ def get_model(self): model.to(torch_device) return model - def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): - shape = shape or self.default_input_samples_shape + def build_random_tensor(self, shape, dtype=torch.float, **kwargs): + """Creates a random tensor of the given shape.""" total_dims = 1 for dim in shape: total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.randint(0, vocab_size - 1)) - input_ids = torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() - # this is needed e.g. for BART + if dtype == torch.long and "vocab_size" in kwargs: + values = [random.randint(0, kwargs["vocab_size"] - 1) for _ in range(total_dims)] + elif dtype == torch.float: + values = [random.random() for _ in range(total_dims)] + else: + raise ValueError(f"Unsupported dtype {dtype}") + return torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() + + def assert_adapter_available(self, model, adapter_name): + """Check wether the adapter name is present in the model's adapter config and has been created.""" + self.assertTrue(adapter_name in model.adapters_config) + self.assertGreater(len(model.get_adapter(adapter_name)), 0) + + def assert_adapter_unavailable(self, model, adapter_name): + """Check wether the adapter name is not present in the model's adapter config and has not been created.""" + self.assertFalse(adapter_name in model.adapters_config) + self.assertEqual(len(model.get_adapter(adapter_name)), 0) + + def extract_input_ids(self, inputs): + # TODO: Check if this is needed in all tests and if it differs between text, vision and speech models + return inputs["input_ids"] + + +class TextAdapterTestBase(AbstractAdapterTestBase): + """Base class for adapter tests for text models. Text models test classes should inherit from this class and override the attributes and functions as needed.""" + + input_shape = (3, 64) + input_shape_generate = (1, 4) + leave_out_layers = [0, 1] + batch_size, seq_length = ( + 1, + 128, + ) # TODO: Check in which tests this is needed and if we can simplify by using input_shape + + def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): + shape = shape or self.input_shape + input_ids = self.build_random_tensor(shape, dtype=torch.long) + + # Ensures that only tha last token in each sample is the eos token (needed e.g. for BART) if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) input_ids[:, -1] = config.eos_token_id in_data = {"input_ids": input_ids} + # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: in_data["decoder_input_ids"] = input_ids.clone() return in_data def add_head(self, model, name, **kwargs): + # TODO: Check if this should be more modular model.add_classification_head(name, **kwargs) return model.heads[name].config["num_labels"] - def dataset(self, tokenizer=None): - # setup tokenizer + def get_dataset(self, tokenizer=None): if tokenizer is None: tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) if tokenizer.pad_token is None: @@ -92,75 +121,57 @@ def dataset(self, tokenizer=None): ) return GlueDataset(data_args, tokenizer=tokenizer, mode="train") - def assert_adapter_available(self, model, adapter_name): - self.assertTrue(adapter_name in model.adapters_config) - self.assertGreater(len(model.get_adapter(adapter_name)), 0) - - def assert_adapter_unavailable(self, model, adapter_name): - self.assertFalse(adapter_name in model.adapters_config) - self.assertEqual(len(model.get_adapter(adapter_name)), 0) - - def extract_input_ids(self, inputs): - return inputs["input_ids"] +class VisionAdapterTestBase(AbstractAdapterTestBase): + """Base class for adapter tests for vision models. Vision models test classes should inherit from this class and override the attributes and functions as needed.""" -class VisionAdapterTestBase(AdapterTestBase): - default_input_samples_shape = (3, 3, 224, 224) + input_shape = (3, 3, 224, 224) def get_input_samples(self, shape=None, config=None, dtype=torch.float, **kwargs): - shape = shape or self.default_input_samples_shape - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.random()) - pixel_values = torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() - in_data = {"pixel_values": pixel_values} - - return in_data + shape = shape or self.input_shape + pixel_values = self.build_random_tensor(shape, dtype=dtype) + return {"pixel_values": pixel_values} def add_head(self, model, name, **kwargs): - if "num_labels" not in kwargs: - kwargs["num_labels"] = 10 + kwargs["num_labels"] = 10 if "num_labels" not in kwargs else kwargs["num_labels"] model.add_image_classification_head(name, **kwargs) return model.heads[name].config["num_labels"] - def dataset(self, feature_extractor=None): + def get_dataset(self, feature_extractor=None): + dataset = datasets.load_dataset( + "./tests/fixtures/samples/cifar10", + data_dir="./tests/fixtures/samples/cifar10", + split="train", + trust_remote_code=True, + ) if feature_extractor is None: feature_extractor = AutoFeatureExtractor.from_pretrained(self.feature_extractor_name) - + def transform(example_batch): inputs = feature_extractor([x for x in example_batch["img"]], return_tensors="pt") - inputs["labels"] = example_batch["label"] return inputs - dataset = datasets.load_dataset( - "./tests/fixtures/samples/cifar10", - data_dir="./tests/fixtures/samples/cifar10", - split="train", - trust_remote_code=True, - ) dataset = dataset.with_transform(transform) - return dataset -class SpeechAdapterTestBase(AdapterTestBase): - """Base class for speech adapter tests.""" +class AudioAdapterTestBase(AbstractAdapterTestBase): + """Base class for adapter tests for audio models. Audio models test classes should inherit from this class and override the attributes and functions as needed.""" - default_input_samples_shape = (3, 80, 3000) # (batch_size, n_mels, enc_seq_len) - generate_input_samples_shape = (1, 80, 3000) - is_speech_model = True # Flag for tests to determine if the model is a speech model due to input format difference + input_shape = (3, 80, 3000) # (batch_size, n_mels, enc_seq_len) + generate_input_shape = (1, 80, 3000) time_window = 3000 # Time window for audio samples seq_length = 80 - def extract_input_ids(self, inputs): - return inputs["input_features"] + _TASK_DATASET_MAPPING = { + # TODO: build global mapping for all tasks and datasets + "seq2seq_lm": "./tests/fixtures/audio_datasets/common_voice_encoded", + "audio_classification": "./tests/fixtures/audio_datasets/speech_commands_encoded", + } def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): - """Adds a head to the model.""" + # TODO: simpify Audio tests by using the same head type for all tests if head_type == "audio_classification": model.add_audio_classification_head(name, **kwargs) return model.heads[name].config["num_labels"] @@ -172,48 +183,22 @@ def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): raise ValueError(f"Head type {head_type} not supported.") def get_input_samples(self, shape=None, config=None, **kwargs): - """Creates a dummy batch of samples in the format required for speech models.""" shape = shape or self.default_input_samples_shape + in_data = {"input_features": self.build_random_tensor(shape, dtype=torch.float)} - # Input features - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.random()) - input_features = torch.tensor(data=values, dtype=torch.float, device=torch_device).view(shape).contiguous() - in_data = {"input_features": input_features} - - # Decoder input ids + # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: - in_data["decoder_input_ids"] = ids_tensor((shape[:-1]), config.vocab_size) + in_data["decoder_input_ids"] = self.build_random_tensor( + (shape[:-1]), dtype=torch.long, vocab_size=config.vocab_size + ) return in_data - _TASK_DATASET_MAPPING = { - "seq2seq_lm": "./tests/fixtures/audio_datasets/common_voice_encoded", - "audio_classification": "./tests/fixtures/audio_datasets/speech_commands_encoded", - } - - def dataset(self, feature_extractor=None, processor=None, tokenizer=None, task_type: str = "seq2seq_lm", **kwargs): - """Returns a dataset to test speech model training. Standard dataset is for seq2seq_lm.""" - if task_type == "seq2seq_lm": - return self._prep_seq2seq_lm_dataset(task_type, **kwargs) - elif task_type == "audio_classification": - return self._prep_audio_classification_dataset(task_type, **kwargs) - - def _prep_seq2seq_lm_dataset(self, task_type, **kwargs): - """Prepares a dataset for conditional generation.""" - # The dataset is already processed and saved to disk, to save time during testing - # Preparation script can be found in tests/fixtures/audio_datasets/prepare_audio_datasets.py + def get_dataset(self, task_type: str = "seq2seq_lm", **kwargs): + # Dataset is already processed and saved to disk, to save time during testing + # Preparation script can be found in tests/fixtures/audio_datasets/respective_prepare_script.py dataset_path = self._TASK_DATASET_MAPPING[task_type] dataset = datasets.load_from_disk(dataset_path) return dataset["train"] - def _prep_audio_classification_dataset(self, task_type, **kwargs): - """Prepares a dataset for audio classification.""" - # The dataset is already processed and saved to disk, to save time during testing - # Preparation script can be found in tests/fixtures/audio_datasets/prepare_audio_datasets.py - dataset_path = self._TASK_DATASET_MAPPING[task_type] - dataset = datasets.load_from_disk(dataset_path) - return dataset["train"] + def extract_input_ids(self, inputs): + return inputs["input_features"] From 65c3fb700075edb0b152dacd4beef37f11272676 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 30 Oct 2024 11:22:53 +0100 Subject: [PATCH 03/63] Replace import class names --- tests/test_adapter.py | 4 ++-- tests/test_albert.py | 4 ++-- tests/test_bart.py | 4 ++-- tests/test_bert.py | 4 ++-- tests/test_bert_generation.py | 4 ++-- tests/test_clip.py | 8 ++++---- tests/test_deberta.py | 4 ++-- tests/test_debertaV2.py | 4 ++-- tests/test_distilbert.py | 4 ++-- tests/test_electra.py | 4 ++-- tests/test_encoder_decoder.py | 4 ++-- tests/test_gpt2.py | 4 ++-- tests/test_gptj.py | 4 ++-- tests/test_llama.py | 4 ++-- tests/test_mbart.py | 4 ++-- tests/test_mistral.py | 4 ++-- tests/test_mt5.py | 4 ++-- tests/test_plbart.py | 4 ++-- tests/test_roberta.py | 4 ++-- tests/test_t5.py | 4 ++-- tests/test_whisper.py | 4 ++-- tests/test_xlm_roberta.py | 4 ++-- tests/test_xmod.py | 4 ++-- 23 files changed, 48 insertions(+), 48 deletions(-) diff --git a/tests/test_adapter.py b/tests/test_adapter.py index fe72ae6ecf..6cf17c981e 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -41,7 +41,7 @@ def get_dataset(self, **kwargs): """Loads a dummy dataset for the model.""" raise NotImplementedError("get_dataset() must be implemented in the subclass.") - def build_model(self): + def get_model(self): """Builds a model instance for testing based on the provied model configuration.""" if self.model_class == AutoAdapterModel: model = AutoAdapterModel.from_config(self.config()) @@ -146,7 +146,7 @@ def get_dataset(self, feature_extractor=None): ) if feature_extractor is None: feature_extractor = AutoFeatureExtractor.from_pretrained(self.feature_extractor_name) - + def transform(example_batch): inputs = feature_extractor([x for x in example_batch["img"]], return_tensors="pt") inputs["labels"] = example_batch["label"] diff --git a/tests/test_albert.py b/tests/test_albert.py index 64dd62bc37..f40620887b 100644 --- a/tests/test_albert.py +++ b/tests/test_albert.py @@ -6,7 +6,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -14,7 +14,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class AlbertAdapterTestBase(AdapterTestBase): +class AlbertAdapterTestBase(TextAdapterTestBase): config_class = AlbertConfig config = make_config( AlbertConfig, diff --git a/tests/test_bart.py b/tests/test_bart.py index 8c11dc7033..5191e56ffe 100644 --- a/tests/test_bart.py +++ b/tests/test_bart.py @@ -14,7 +14,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -22,7 +22,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class BartAdapterTestBase(AdapterTestBase): +class BartAdapterTestBase(TextAdapterTestBase): config_class = BartConfig config = make_config( BartConfig, diff --git a/tests/test_bert.py b/tests/test_bert.py index 7bde9b557a..9ef04e1178 100644 --- a/tests/test_bert.py +++ b/tests/test_bert.py @@ -6,7 +6,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -14,7 +14,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class BertAdapterTestBase(AdapterTestBase): +class BertAdapterTestBase(TextAdapterTestBase): config_class = BertConfig config = make_config( BertConfig, diff --git a/tests/test_bert_generation.py b/tests/test_bert_generation.py index 48fe3e7b40..61f149cc6d 100644 --- a/tests/test_bert_generation.py +++ b/tests/test_bert_generation.py @@ -7,7 +7,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -15,7 +15,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class BertGenerationAdapterTestBase(AdapterTestBase): +class BertGenerationAdapterTestBase(TextAdapterTestBase): config_class = BertGenerationConfig config = make_config( BertGenerationConfig, diff --git a/tests/test_clip.py b/tests/test_clip.py index 704c7a1648..056e0f9fd9 100644 --- a/tests/test_clip.py +++ b/tests/test_clip.py @@ -23,7 +23,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, VisionAdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, VisionAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_fusion_common import AdapterFusionModelTestMixin @@ -90,7 +90,7 @@ class CLIPVisionWithProjectionAdapterTest( pass -class CLIPTextAdapterTestBase(AdapterTestBase): +class CLIPTextAdapterTestBase(TextAdapterTestBase): model_class = CLIPTextModel config_class = CLIPTextConfig config = make_config( @@ -120,7 +120,7 @@ class CLIPTextAdapterTest( pass -class CLIPTextWithProjectionAdapterTestBase(AdapterTestBase): +class CLIPTextWithProjectionAdapterTestBase(TextAdapterTestBase): model_class = CLIPTextModelWithProjection config_class = CLIPTextConfig config = make_config( @@ -150,7 +150,7 @@ class CLIPTextWithProjectionAdapterTest( pass -class CLIPAdapterTestBase(AdapterTestBase): +class CLIPAdapterTestBase(TextAdapterTestBase): config_class = CLIPConfig config = staticmethod( lambda: CLIPConfig.from_text_vision_configs( diff --git a/tests/test_deberta.py b/tests/test_deberta.py index abd1a7a884..8d5fad2c88 100644 --- a/tests/test_deberta.py +++ b/tests/test_deberta.py @@ -6,7 +6,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -14,7 +14,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class DebertaAdapterTestBase(AdapterTestBase): +class DebertaAdapterTestBase(TextAdapterTestBase): config_class = DebertaConfig config = make_config( DebertaConfig, diff --git a/tests/test_debertaV2.py b/tests/test_debertaV2.py index 6494e1f865..94ff32c7f3 100644 --- a/tests/test_debertaV2.py +++ b/tests/test_debertaV2.py @@ -6,7 +6,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -14,7 +14,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class DebertaV2AdapterTestBase(AdapterTestBase): +class DebertaV2AdapterTestBase(TextAdapterTestBase): config_class = DebertaV2Config config = make_config( DebertaV2Config, diff --git a/tests/test_distilbert.py b/tests/test_distilbert.py index c90c39875c..92d010389f 100644 --- a/tests/test_distilbert.py +++ b/tests/test_distilbert.py @@ -6,7 +6,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -14,7 +14,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class DistilBertAdapterTestBase(AdapterTestBase): +class DistilBertAdapterTestBase(TextAdapterTestBase): config_class = DistilBertConfig config = make_config( DistilBertConfig, diff --git a/tests/test_electra.py b/tests/test_electra.py index d3272a23d5..7dc6123796 100644 --- a/tests/test_electra.py +++ b/tests/test_electra.py @@ -6,7 +6,7 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -14,7 +14,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class ElectraAdapterTestBase(AdapterTestBase): +class ElectraAdapterTestBase(TextAdapterTestBase): config_class = ElectraConfig config = make_config( ElectraConfig, diff --git a/tests/test_encoder_decoder.py b/tests/test_encoder_decoder.py index 708a6bfbb2..6d02155f86 100644 --- a/tests/test_encoder_decoder.py +++ b/tests/test_encoder_decoder.py @@ -14,11 +14,11 @@ PrefixTuningTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase +from .test_adapter import TextAdapterTestBase from .test_adapter_fusion_common import AdapterFusionModelTestMixin -class EncoderDecoderAdapterTestBase(AdapterTestBase): +class EncoderDecoderAdapterTestBase(TextAdapterTestBase): model_class = EncoderDecoderModel config_class = EncoderDecoderConfig config = staticmethod( diff --git a/tests/test_gpt2.py b/tests/test_gpt2.py index 76d6d9221f..5745953c22 100644 --- a/tests/test_gpt2.py +++ b/tests/test_gpt2.py @@ -14,7 +14,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -22,7 +22,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class GPT2AdapterTestBase(AdapterTestBase): +class GPT2AdapterTestBase(TextAdapterTestBase): config_class = GPT2Config config = make_config( GPT2Config, diff --git a/tests/test_gptj.py b/tests/test_gptj.py index 934abf2904..87ea6ae766 100644 --- a/tests/test_gptj.py +++ b/tests/test_gptj.py @@ -14,7 +14,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -22,7 +22,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class GPTJAdapterTestBase(AdapterTestBase): +class GPTJAdapterTestBase(TextAdapterTestBase): config_class = GPTJConfig config = make_config( GPTJConfig, diff --git a/tests/test_llama.py b/tests/test_llama.py index d3c78e23f3..8392fa32c2 100644 --- a/tests/test_llama.py +++ b/tests/test_llama.py @@ -13,7 +13,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -21,7 +21,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class LlamaAdapterTestBase(AdapterTestBase): +class LlamaAdapterTestBase(TextAdapterTestBase): config_class = LlamaConfig config = make_config( LlamaConfig, diff --git a/tests/test_mbart.py b/tests/test_mbart.py index 56fa406daf..691e46e537 100644 --- a/tests/test_mbart.py +++ b/tests/test_mbart.py @@ -13,13 +13,13 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_fusion_common import AdapterFusionModelTestMixin from .test_adapter_heads import PredictionHeadModelTestMixin -class MBartAdapterTestBase(AdapterTestBase): +class MBartAdapterTestBase(TextAdapterTestBase): config_class = MBartConfig config = make_config( MBartConfig, diff --git a/tests/test_mistral.py b/tests/test_mistral.py index b10065a702..960d3f0443 100644 --- a/tests/test_mistral.py +++ b/tests/test_mistral.py @@ -13,7 +13,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -21,7 +21,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class MistralAdapterTestBase(AdapterTestBase): +class MistralAdapterTestBase(TextAdapterTestBase): config_class = MistralConfig config = make_config( MistralConfig, diff --git a/tests/test_mt5.py b/tests/test_mt5.py index a7d7c3a0fe..d1f76325d7 100644 --- a/tests/test_mt5.py +++ b/tests/test_mt5.py @@ -13,7 +13,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -22,7 +22,7 @@ @require_torch -class MT5AdapterTestBase(AdapterTestBase): +class MT5AdapterTestBase(TextAdapterTestBase): config_class = MT5Config config = make_config( MT5Config, diff --git a/tests/test_plbart.py b/tests/test_plbart.py index aa84457919..b268f6b2d2 100644 --- a/tests/test_plbart.py +++ b/tests/test_plbart.py @@ -13,7 +13,7 @@ PrefixTuningTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -21,7 +21,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class PLBartAdapterTestBase(AdapterTestBase): +class PLBartAdapterTestBase(TextAdapterTestBase): config_class = PLBartConfig config = make_config( PLBartConfig, diff --git a/tests/test_roberta.py b/tests/test_roberta.py index 142a69e7a8..2e6df220e6 100644 --- a/tests/test_roberta.py +++ b/tests/test_roberta.py @@ -6,14 +6,14 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_fusion_common import AdapterFusionModelTestMixin from .test_adapter_heads import PredictionHeadModelTestMixin -class RobertaAdapterTestBase(AdapterTestBase): +class RobertaAdapterTestBase(TextAdapterTestBase): config_class = RobertaConfig config = make_config( RobertaConfig, diff --git a/tests/test_t5.py b/tests/test_t5.py index 1c2480c6bb..82aa9f1047 100644 --- a/tests/test_t5.py +++ b/tests/test_t5.py @@ -13,7 +13,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -22,7 +22,7 @@ @require_torch -class T5AdapterTestBase(AdapterTestBase): +class T5AdapterTestBase(TextAdapterTestBase): config_class = T5Config config = make_config( T5Config, diff --git a/tests/test_whisper.py b/tests/test_whisper.py index c3cd3d2206..5b40bd00f5 100644 --- a/tests/test_whisper.py +++ b/tests/test_whisper.py @@ -14,7 +14,7 @@ ReftTestMixin, UniPELTTestMixin, ) -from .test_adapter import SpeechAdapterTestBase, make_config +from .test_adapter import AudioAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_embeddings import EmbeddingTestMixin @@ -22,7 +22,7 @@ from .test_adapter_heads import PredictionHeadModelTestMixin -class WhisperAdapterTestBase(SpeechAdapterTestBase): +class WhisperAdapterTestBase(AudioAdapterTestBase): config_class = WhisperConfig config = make_config( WhisperConfig, diff --git a/tests/test_xlm_roberta.py b/tests/test_xlm_roberta.py index 9125b3fbeb..320251f920 100644 --- a/tests/test_xlm_roberta.py +++ b/tests/test_xlm_roberta.py @@ -4,12 +4,12 @@ from transformers.testing_utils import require_torch from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_fusion_common import AdapterFusionModelTestMixin -class XLMRobertaAdapterTestBase(AdapterTestBase): +class XLMRobertaAdapterTestBase(TextAdapterTestBase): config_class = XLMRobertaConfig config = make_config( XLMRobertaConfig, diff --git a/tests/test_xmod.py b/tests/test_xmod.py index 9ca2aaa70a..2306aa9c44 100644 --- a/tests/test_xmod.py +++ b/tests/test_xmod.py @@ -5,14 +5,14 @@ from .composition.test_parallel import ParallelAdapterInferenceTestMixin from .methods import AllMethodsTestMixin -from .test_adapter import AdapterTestBase, make_config +from .test_adapter import TextAdapterTestBase, make_config from .test_adapter_backward_compability import CompabilityTestMixin from .test_adapter_conversion import ModelClassConversionTestMixin from .test_adapter_fusion_common import AdapterFusionModelTestMixin from .test_adapter_heads import PredictionHeadModelTestMixin -class XmodAdapterTestBase(AdapterTestBase): +class XmodAdapterTestBase(TextAdapterTestBase): config_class = XmodConfig config = make_config( XmodConfig, From ee6166cb254f0725771609a41f2f511abde2db1d Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 1 Nov 2024 14:01:53 +0100 Subject: [PATCH 04/63] Base refactoring: - reorder directory structure to separate testing models and adapter methods (needs further refactoring for separating tests that are extecuted for each model vs tests that are just run once) - remove all model tests except albert for now (will be readded once final design is agreed on) - refactor albert adapter test class to group test mixins in categories which are then displayed accordingly by test viewer - adjust imports to reflect new directory structure --- tests/{models => fixtures}/__init__.py | 0 tests/methods/__init__.py | 42 ---- tests/models/test_bart.py | 12 - tests/models/test_beit.py | 12 - tests/models/test_bert.py | 12 - tests/models/test_bert_generation.py | 12 - tests/models/test_clip.py | 39 --- tests/models/test_deberta.py | 12 - tests/models/test_debertaV2.py | 12 - tests/models/test_distilbert.py | 12 - tests/models/test_electra.py | 12 - tests/models/test_encoder_decoder.py | 2 - tests/models/test_gpt2.py | 12 - tests/models/test_gptj.py | 12 - tests/models/test_llama.py | 12 - tests/models/test_mbart.py | 12 - tests/models/test_mistral.py | 12 - tests/models/test_mt5.py | 12 - tests/models/test_plbart.py | 12 - tests/models/test_roberta.py | 12 - tests/models/test_t5.py | 12 - tests/models/test_vit.py | 12 - tests/models/test_whisper.py | 12 - tests/models/test_xlm_roberta.py | 2 - tests/models/test_xmod.py | 12 - tests/test_albert.py | 55 ----- tests/test_bart.py | 68 ------ tests/test_beit.py | 45 ---- tests/test_bert.py | 51 ---- tests/test_bert_generation.py | 94 -------- tests/test_clip.py | 227 ------------------ tests/test_deberta.py | 55 ----- tests/test_debertaV2.py | 54 ----- tests/test_distilbert.py | 51 ---- tests/test_electra.py | 52 ---- tests/test_encoder_decoder.py | 90 ------- tests/test_gpt2.py | 67 ------ tests/test_gptj.py | 68 ------ tests/test_impl/__init__.py | 0 tests/{methods => test_impl}/base.py | 0 tests/test_impl/composition/__init__.py | 0 .../composition/test_adapter_composition.py | 3 +- .../composition/test_parallel.py | 1 + tests/test_impl/core/__init__.py | 0 .../test_adapter_backward_compability.py | 4 +- .../core}/test_adapter_config.py | 2 + .../core}/test_adapter_conversion.py | 1 + .../core}/test_adapter_fusion_common.py | 1 + .../core}/test_adapter_fusion_config.py | 2 + .../{ => test_impl/core}/test_adapter_hub.py | 4 +- .../core}/test_adapter_safetensors.py | 1 + .../core}/test_adapter_save_id2label.py | 2 + tests/test_impl/embeddings/__init__.py | 0 .../embeddings}/test_adapter_embeddings.py | 0 tests/test_impl/heads/__init__.py | 0 .../heads}/test_adapter_custom_head.py | 3 +- .../heads}/test_adapter_heads.py | 3 +- tests/test_impl/peft/__init__.py | 0 .../peft}/test_adapter_common.py | 4 +- .../peft}/test_compacter.py | 3 +- .../peft}/test_config_union.py | 2 +- tests/{methods => test_impl/peft}/test_ia3.py | 3 +- .../{methods => test_impl/peft}/test_lora.py | 3 +- .../peft}/test_prefix_tuning.py | 3 +- .../peft}/test_prompt_tuning.py | 3 +- .../{methods => test_impl/peft}/test_reft.py | 3 +- .../peft}/test_unipelt.py | 3 +- tests/test_impl/trainer/__init__.py | 0 .../trainer}/test_adapter_trainer.py | 0 .../trainer}/test_adapter_trainer_ext.py | 2 +- tests/test_llama.py | 66 ----- tests/test_mbart.py | 62 ----- tests/test_methods/__init__.py | 0 .../{test_adapter.py => test_methods/base.py} | 8 + tests/test_methods/imports.py | 21 ++ tests/test_methods/test_albert.py | 142 +++++++++++ tests/test_mistral.py | 66 ----- tests/test_models/__init__.py | 0 tests/{models => test_models}/base.py | 0 tests/{models => test_models}/test_albert.py | 0 tests/test_mt5.py | 68 ------ tests/test_plbart.py | 67 ------ tests/test_roberta.py | 49 ---- tests/test_t5.py | 68 ------ tests/test_vit.py | 48 ---- tests/test_whisper.py | 72 ------ tests/test_xlm_roberta.py | 41 ---- tests/test_xmod.py | 49 ---- 88 files changed, 201 insertions(+), 1984 deletions(-) rename tests/{models => fixtures}/__init__.py (100%) delete mode 100644 tests/methods/__init__.py delete mode 100644 tests/models/test_bart.py delete mode 100644 tests/models/test_beit.py delete mode 100644 tests/models/test_bert.py delete mode 100644 tests/models/test_bert_generation.py delete mode 100644 tests/models/test_clip.py delete mode 100644 tests/models/test_deberta.py delete mode 100644 tests/models/test_debertaV2.py delete mode 100644 tests/models/test_distilbert.py delete mode 100644 tests/models/test_electra.py delete mode 100644 tests/models/test_encoder_decoder.py delete mode 100644 tests/models/test_gpt2.py delete mode 100644 tests/models/test_gptj.py delete mode 100644 tests/models/test_llama.py delete mode 100644 tests/models/test_mbart.py delete mode 100644 tests/models/test_mistral.py delete mode 100644 tests/models/test_mt5.py delete mode 100644 tests/models/test_plbart.py delete mode 100644 tests/models/test_roberta.py delete mode 100644 tests/models/test_t5.py delete mode 100644 tests/models/test_vit.py delete mode 100644 tests/models/test_whisper.py delete mode 100644 tests/models/test_xlm_roberta.py delete mode 100644 tests/models/test_xmod.py delete mode 100644 tests/test_albert.py delete mode 100644 tests/test_bart.py delete mode 100644 tests/test_beit.py delete mode 100644 tests/test_bert.py delete mode 100644 tests/test_bert_generation.py delete mode 100644 tests/test_clip.py delete mode 100644 tests/test_deberta.py delete mode 100644 tests/test_debertaV2.py delete mode 100644 tests/test_distilbert.py delete mode 100644 tests/test_electra.py delete mode 100644 tests/test_encoder_decoder.py delete mode 100644 tests/test_gpt2.py delete mode 100644 tests/test_gptj.py create mode 100644 tests/test_impl/__init__.py rename tests/{methods => test_impl}/base.py (100%) create mode 100644 tests/test_impl/composition/__init__.py rename tests/{ => test_impl}/composition/test_adapter_composition.py (99%) rename tests/{ => test_impl}/composition/test_parallel.py (99%) create mode 100644 tests/test_impl/core/__init__.py rename tests/{ => test_impl/core}/test_adapter_backward_compability.py (96%) rename tests/{ => test_impl/core}/test_adapter_config.py (99%) rename tests/{ => test_impl/core}/test_adapter_conversion.py (99%) rename tests/{ => test_impl/core}/test_adapter_fusion_common.py (99%) rename tests/{ => test_impl/core}/test_adapter_fusion_config.py (98%) rename tests/{ => test_impl/core}/test_adapter_hub.py (99%) rename tests/{ => test_impl/core}/test_adapter_safetensors.py (99%) rename tests/{ => test_impl/core}/test_adapter_save_id2label.py (99%) create mode 100644 tests/test_impl/embeddings/__init__.py rename tests/{ => test_impl/embeddings}/test_adapter_embeddings.py (100%) create mode 100644 tests/test_impl/heads/__init__.py rename tests/{ => test_impl/heads}/test_adapter_custom_head.py (98%) rename tests/{ => test_impl/heads}/test_adapter_heads.py (99%) create mode 100644 tests/test_impl/peft/__init__.py rename tests/{methods => test_impl/peft}/test_adapter_common.py (99%) rename tests/{methods => test_impl/peft}/test_compacter.py (98%) rename tests/{methods => test_impl/peft}/test_config_union.py (96%) rename tests/{methods => test_impl/peft}/test_ia3.py (96%) rename tests/{methods => test_impl/peft}/test_lora.py (99%) rename tests/{methods => test_impl/peft}/test_prefix_tuning.py (98%) rename tests/{methods => test_impl/peft}/test_prompt_tuning.py (96%) rename tests/{methods => test_impl/peft}/test_reft.py (98%) rename tests/{methods => test_impl/peft}/test_unipelt.py (97%) create mode 100644 tests/test_impl/trainer/__init__.py rename tests/{ => test_impl/trainer}/test_adapter_trainer.py (100%) rename tests/{extended => test_impl/trainer}/test_adapter_trainer_ext.py (99%) delete mode 100644 tests/test_llama.py delete mode 100644 tests/test_mbart.py create mode 100644 tests/test_methods/__init__.py rename tests/{test_adapter.py => test_methods/base.py} (96%) create mode 100644 tests/test_methods/imports.py create mode 100644 tests/test_methods/test_albert.py delete mode 100644 tests/test_mistral.py create mode 100644 tests/test_models/__init__.py rename tests/{models => test_models}/base.py (100%) rename tests/{models => test_models}/test_albert.py (100%) delete mode 100644 tests/test_mt5.py delete mode 100644 tests/test_plbart.py delete mode 100644 tests/test_roberta.py delete mode 100644 tests/test_t5.py delete mode 100644 tests/test_vit.py delete mode 100644 tests/test_whisper.py delete mode 100644 tests/test_xlm_roberta.py delete mode 100644 tests/test_xmod.py diff --git a/tests/models/__init__.py b/tests/fixtures/__init__.py similarity index 100% rename from tests/models/__init__.py rename to tests/fixtures/__init__.py diff --git a/tests/methods/__init__.py b/tests/methods/__init__.py deleted file mode 100644 index ea65b2997b..0000000000 --- a/tests/methods/__init__.py +++ /dev/null @@ -1,42 +0,0 @@ -# flake8: noqa -# There's no way to ignore "F401 '...' imported but unused" warnings in this -# module, but to preserve other warnings. So, don't check this module at all. - -# Copyright 2020 The Adapter-Hub Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .base import create_twin_models -from .test_adapter_common import BottleneckAdapterTestMixin -from .test_compacter import CompacterTestMixin -from .test_ia3 import IA3TestMixin -from .test_lora import LoRATestMixin -from .test_prefix_tuning import PrefixTuningTestMixin -from .test_prompt_tuning import PromptTuningTestMixin -from .test_reft import ReftTestMixin -from .test_unipelt import UniPELTTestMixin - - -class AllMethodsTestMixin( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - PromptTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -): - """Shorthand mixin for models which support all adapter methods.""" - - pass diff --git a/tests/models/test_bart.py b/tests/models/test_bart.py deleted file mode 100644 index 70d97c97a0..0000000000 --- a/tests/models/test_bart.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import BartAdapterModel -from hf_transformers.tests.models.bart.test_modeling_bart import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class BartAdapterModelTest(AdapterModelTesterMixin, BartModelTest): - all_model_classes = (BartAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_beit.py b/tests/models/test_beit.py deleted file mode 100644 index 1d6fc92727..0000000000 --- a/tests/models/test_beit.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import BeitAdapterModel -from hf_transformers.tests.models.beit.test_modeling_beit import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class BeitAdapterModelTest(AdapterModelTesterMixin, BeitModelTest): - all_model_classes = (BeitAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_bert.py b/tests/models/test_bert.py deleted file mode 100644 index 1ca69b0b8f..0000000000 --- a/tests/models/test_bert.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import BertAdapterModel -from hf_transformers.tests.models.bert.test_modeling_bert import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class BertAdapterModelTest(AdapterModelTesterMixin, BertModelTest): - all_model_classes = (BertAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_bert_generation.py b/tests/models/test_bert_generation.py deleted file mode 100644 index 15f867e00d..0000000000 --- a/tests/models/test_bert_generation.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import BertGenerationAdapterModel -from hf_transformers.tests.models.bert_generation.test_modeling_bert_generation import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class BertGenerationAdapterModelTest(AdapterModelTesterMixin, BertGenerationEncoderTest): - all_model_classes = (BertGenerationAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_clip.py b/tests/models/test_clip.py deleted file mode 100644 index 921e0668f5..0000000000 --- a/tests/models/test_clip.py +++ /dev/null @@ -1,39 +0,0 @@ -# flake8: noqa: F403,F405 -import numpy as np - -from adapters import CLIPAdapterModel -from hf_transformers.tests.models.clip.test_modeling_clip import * # Imported to execute model tests -from hf_transformers.tests.test_modeling_common import _config_zero_init -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class CLIPAdapterModelTest(AdapterModelTesterMixin, CLIPModelTest): - all_model_classes = (CLIPAdapterModel,) - fx_compatible = False - - # override as the `logit_scale` parameter has a different name in the adapter model - def test_initialization(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - configs_no_init = _config_zero_init(config) - for model_class in self.all_model_classes: - model = model_class(config=configs_no_init) - for name, param in model.named_parameters(): - if param.requires_grad: - # check if `logit_scale` is initilized as per the original implementation - if name == "clip.logit_scale": - self.assertAlmostEqual( - param.data.item(), - np.log(1 / 0.07), - delta=1e-3, - msg=f"Parameter {name} of model {model_class} seems not properly initialized", - ) - else: - self.assertIn( - ((param.data.mean() * 1e9).round() / 1e9).item(), - [0.0, 1.0], - msg=f"Parameter {name} of model {model_class} seems not properly initialized", - ) diff --git a/tests/models/test_deberta.py b/tests/models/test_deberta.py deleted file mode 100644 index 27f94bf1b4..0000000000 --- a/tests/models/test_deberta.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import DebertaAdapterModel -from hf_transformers.tests.models.deberta.test_modeling_deberta import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class DebertaAdapterModelTest(AdapterModelTesterMixin, DebertaModelTest): - all_model_classes = (DebertaAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_debertaV2.py b/tests/models/test_debertaV2.py deleted file mode 100644 index 9e97466cc5..0000000000 --- a/tests/models/test_debertaV2.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import DebertaV2AdapterModel -from hf_transformers.tests.models.deberta_v2.test_modeling_deberta_v2 import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class DebertaV2AdapterModelTest(AdapterModelTesterMixin, DebertaV2ModelTest): - all_model_classes = (DebertaV2AdapterModel,) - fx_compatible = False diff --git a/tests/models/test_distilbert.py b/tests/models/test_distilbert.py deleted file mode 100644 index 56cad41de0..0000000000 --- a/tests/models/test_distilbert.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import DistilBertAdapterModel -from hf_transformers.tests.models.distilbert.test_modeling_distilbert import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class DistilBertAdapterModelTest(AdapterModelTesterMixin, DistilBertModelTest): - all_model_classes = (DistilBertAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_electra.py b/tests/models/test_electra.py deleted file mode 100644 index 642eeb0c04..0000000000 --- a/tests/models/test_electra.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import ElectraAdapterModel -from hf_transformers.tests.models.electra.test_modeling_electra import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class ElectraAdapterModelTest(AdapterModelTesterMixin, ElectraModelTester): - all_model_classes = (ElectraAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_encoder_decoder.py b/tests/models/test_encoder_decoder.py deleted file mode 100644 index 8f6f4b5f87..0000000000 --- a/tests/models/test_encoder_decoder.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from hf_transformers.tests.models.encoder_decoder.test_modeling_encoder_decoder import * # Imported to execute model tests diff --git a/tests/models/test_gpt2.py b/tests/models/test_gpt2.py deleted file mode 100644 index f904be53b8..0000000000 --- a/tests/models/test_gpt2.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import GPT2AdapterModel -from hf_transformers.tests.models.gpt2.test_modeling_gpt2 import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class GPT2AdapterModelTest(AdapterModelTesterMixin, GPT2ModelTest): - all_model_classes = (GPT2AdapterModel,) - fx_compatible = False diff --git a/tests/models/test_gptj.py b/tests/models/test_gptj.py deleted file mode 100644 index 5cd7610649..0000000000 --- a/tests/models/test_gptj.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import GPTJAdapterModel -from hf_transformers.tests.models.gptj.test_modeling_gptj import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class GPTJAdapterModelTest(AdapterModelTesterMixin, GPTJModelTest): - all_model_classes = (GPTJAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_llama.py b/tests/models/test_llama.py deleted file mode 100644 index 4246f048e7..0000000000 --- a/tests/models/test_llama.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import LlamaAdapterModel -from hf_transformers.tests.models.llama.test_modeling_llama import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class LlamaAdapterModelTest(AdapterModelTesterMixin, LlamaModelTest): - all_model_classes = (LlamaAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_mbart.py b/tests/models/test_mbart.py deleted file mode 100644 index f874082af0..0000000000 --- a/tests/models/test_mbart.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import MBartAdapterModel -from hf_transformers.tests.models.mbart.test_modeling_mbart import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class MBartAdapterModelTest(AdapterModelTesterMixin, MBartModelTest): - all_model_classes = (MBartAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_mistral.py b/tests/models/test_mistral.py deleted file mode 100644 index be66648c37..0000000000 --- a/tests/models/test_mistral.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import MistralAdapterModel -from hf_transformers.tests.models.mistral.test_modeling_mistral import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class MistralAdapterModelTest(AdapterModelTesterMixin, MistralModelTest): - all_model_classes = (MistralAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_mt5.py b/tests/models/test_mt5.py deleted file mode 100644 index 8d9f551e8b..0000000000 --- a/tests/models/test_mt5.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import MT5AdapterModel -from hf_transformers.tests.models.mt5.test_modeling_mt5 import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class MT5AdapterModelTest(AdapterModelTesterMixin, MT5IntegrationTest): - all_model_classes = (MT5AdapterModel,) - fx_compatible = False diff --git a/tests/models/test_plbart.py b/tests/models/test_plbart.py deleted file mode 100644 index 7fbbfc38df..0000000000 --- a/tests/models/test_plbart.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import PLBartAdapterModel -from hf_transformers.tests.models.plbart.test_modeling_plbart import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class PLBartAdapterModelTest(AdapterModelTesterMixin, PLBartModelTest): - all_model_classes = (PLBartAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_roberta.py b/tests/models/test_roberta.py deleted file mode 100644 index e89886220f..0000000000 --- a/tests/models/test_roberta.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import RobertaAdapterModel -from hf_transformers.tests.models.roberta.test_modeling_roberta import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class RobertaAdapterModelTest(AdapterModelTesterMixin, RobertaModelTest): - all_model_classes = (RobertaAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_t5.py b/tests/models/test_t5.py deleted file mode 100644 index 12d31a03e7..0000000000 --- a/tests/models/test_t5.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import T5AdapterModel -from hf_transformers.tests.models.t5.test_modeling_t5 import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class T5AdapterModelTest(AdapterModelTesterMixin, T5ModelTest): - all_model_classes = (T5AdapterModel,) - fx_compatible = False diff --git a/tests/models/test_vit.py b/tests/models/test_vit.py deleted file mode 100644 index a5fc5a05bc..0000000000 --- a/tests/models/test_vit.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import ViTAdapterModel -from hf_transformers.tests.models.vit.test_modeling_vit import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class ViTAdapterModelTest(AdapterModelTesterMixin, ViTModelTest): - all_model_classes = (ViTAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_whisper.py b/tests/models/test_whisper.py deleted file mode 100644 index bfeea5a508..0000000000 --- a/tests/models/test_whisper.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import WhisperAdapterModel -from hf_transformers.tests.models.whisper.test_modeling_whisper import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class WhisperAdapterModelTest(AdapterModelTesterMixin, WhisperModelTest): - all_model_classes = (WhisperAdapterModel,) - fx_compatible = False diff --git a/tests/models/test_xlm_roberta.py b/tests/models/test_xlm_roberta.py deleted file mode 100644 index 8232515028..0000000000 --- a/tests/models/test_xlm_roberta.py +++ /dev/null @@ -1,2 +0,0 @@ -# flake8: noqa -from hf_transformers.tests.models.xlm_roberta.test_modeling_xlm_roberta import * # Imported to execute model tests diff --git a/tests/models/test_xmod.py b/tests/models/test_xmod.py deleted file mode 100644 index 2a0faa06b3..0000000000 --- a/tests/models/test_xmod.py +++ /dev/null @@ -1,12 +0,0 @@ -# flake8: noqa: F403,F405 -from adapters import XmodAdapterModel -from hf_transformers.tests.models.xmod.test_modeling_xmod import * -from transformers.testing_utils import require_torch - -from .base import AdapterModelTesterMixin - - -@require_torch -class XmodAdapterModelTest(AdapterModelTesterMixin, XmodModelTest): - all_model_classes = (XmodAdapterModel,) - fx_compatible = False diff --git a/tests/test_albert.py b/tests/test_albert.py deleted file mode 100644 index f40620887b..0000000000 --- a/tests/test_albert.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest -from math import ceil - -from transformers import AlbertConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class AlbertAdapterTestBase(TextAdapterTestBase): - config_class = AlbertConfig - config = make_config( - AlbertConfig, - embedding_size=16, - hidden_size=64, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - num_hidden_groups=2, - ) - tokenizer_name = "albert-base-v2" - leave_out_layers = [0] - - -@require_torch -class AlbertAdapterTest( - AllMethodsTestMixin, - EmbeddingTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - AlbertAdapterTestBase, - unittest.TestCase, -): - def test_context_simple(self): - expected_number_of_adapter_calls = ceil(self.config().num_hidden_layers / self.config().num_hidden_groups) - super().test_context_simple(expected_number_of_adapter_calls=expected_number_of_adapter_calls) - - -@require_torch -class AlbertClassConversionTest( - ModelClassConversionTestMixin, - AlbertAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_bart.py b/tests/test_bart.py deleted file mode 100644 index 5191e56ffe..0000000000 --- a/tests/test_bart.py +++ /dev/null @@ -1,68 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import BartConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class BartAdapterTestBase(TextAdapterTestBase): - config_class = BartConfig - config = make_config( - BartConfig, - d_model=16, - encoder_layers=2, - decoder_layers=2, - encoder_attention_heads=4, - decoder_attention_heads=4, - encoder_ffn_dim=4, - decoder_ffn_dim=4, - ) - tokenizer_name = "facebook/bart-base" - - -@require_torch -class BartAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - EmbeddingTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - BartAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class BartClassConversionTest( - ModelClassConversionTestMixin, - BartAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_beit.py b/tests/test_beit.py deleted file mode 100644 index a943b2e7fd..0000000000 --- a/tests/test_beit.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest - -from transformers import BeitConfig -from transformers.testing_utils import require_torch - -from .methods import AllMethodsTestMixin -from .test_adapter import VisionAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class BeitAdapterTestBase(VisionAdapterTestBase): - config_class = BeitConfig - config = make_config( - BeitConfig, - image_size=224, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - feature_extractor_name = "microsoft/beit-base-patch16-224-pt22k" - - -@require_torch -class BeitAdapterTest( - AllMethodsTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - BeitAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class BeitClassConversionTest( - ModelClassConversionTestMixin, - BeitAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_bert.py b/tests/test_bert.py deleted file mode 100644 index 9ef04e1178..0000000000 --- a/tests/test_bert.py +++ /dev/null @@ -1,51 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import BertConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class BertAdapterTestBase(TextAdapterTestBase): - config_class = BertConfig - config = make_config( - BertConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "bert-base-uncased" - - -@require_torch -class BertAdapterTest( - AllMethodsTestMixin, - EmbeddingTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - BertAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class BertClassConversionTest( - ModelClassConversionTestMixin, - BertAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_bert_generation.py b/tests/test_bert_generation.py deleted file mode 100644 index 61f149cc6d..0000000000 --- a/tests/test_bert_generation.py +++ /dev/null @@ -1,94 +0,0 @@ -import unittest - -from datasets import load_dataset - -from transformers import AutoTokenizer, BertGenerationConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class BertGenerationAdapterTestBase(TextAdapterTestBase): - config_class = BertGenerationConfig - config = make_config( - BertGenerationConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "bert-base-uncased" - - def add_head(self, model, name, **kwargs): - model.add_masked_lm_head(name) - return self.default_input_samples_shape[-1] - - def dataset(self, tokenizer=None): - # setup tokenizer - if tokenizer is None: - tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - - def preprocess_function(examples): - inputs = examples["document"] - targets = examples["summary"] - inputs = ["Summarize: " + inp for inp in inputs] - model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=128) - - # Setup the tokenizer for targets - with tokenizer.as_target_tokenizer(): - labels = tokenizer(targets, padding="max_length", truncation=True, max_length=128) - - # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore - # padding in the loss. - labels["input_ids"] = [ - [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"] - ] - - model_inputs["labels"] = labels["input_ids"] - return model_inputs - - data_args = { - "task_name": "xsum", - "path": "./tests/fixtures/samples/xsum/sample.json", - } - dataset = load_dataset("json", data_files=data_args["path"]) - train_dataset = dataset["train"] - train_dataset = train_dataset.map( - preprocess_function, - batched=True, - desc="Running tokenizer on train dataset", - ) - return train_dataset - - -@require_torch -class BertGenerationAdapterTest( - AllMethodsTestMixin, - EmbeddingTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - BertGenerationAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class BertGenerationClassConversionTest( - ModelClassConversionTestMixin, - BertGenerationAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_clip.py b/tests/test_clip.py deleted file mode 100644 index 056e0f9fd9..0000000000 --- a/tests/test_clip.py +++ /dev/null @@ -1,227 +0,0 @@ -import random -import unittest - -import torch - -from transformers import ( - CLIPConfig, - CLIPTextConfig, - CLIPTextModel, - CLIPTextModelWithProjection, - CLIPVisionConfig, - CLIPVisionModel, - CLIPVisionModelWithProjection, -) -from transformers.testing_utils import require_torch, torch_device - -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, VisionAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin - - -class CLIPVisionAdapterTestBase(VisionAdapterTestBase): - model_class = CLIPVisionModel - config_class = CLIPVisionConfig - config = make_config( - CLIPVisionConfig, - image_size=30, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - feature_extractor_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPVisionAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPVisionAdapterTestBase, - unittest.TestCase, -): - pass - - -class CLIPVisionWithProjectionAdapterTestBase(VisionAdapterTestBase): - model_class = CLIPVisionModelWithProjection - config_class = CLIPVisionConfig - config = make_config( - CLIPVisionConfig, - image_size=30, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - feature_extractor_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPVisionWithProjectionAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPVisionWithProjectionAdapterTestBase, - unittest.TestCase, -): - pass - - -class CLIPTextAdapterTestBase(TextAdapterTestBase): - model_class = CLIPTextModel - config_class = CLIPTextConfig - config = make_config( - CLIPTextConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPTextAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPTextAdapterTestBase, - unittest.TestCase, -): - pass - - -class CLIPTextWithProjectionAdapterTestBase(TextAdapterTestBase): - model_class = CLIPTextModelWithProjection - config_class = CLIPTextConfig - config = make_config( - CLIPTextConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPTextWithProjectionAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPTextWithProjectionAdapterTestBase, - unittest.TestCase, -): - pass - - -class CLIPAdapterTestBase(TextAdapterTestBase): - config_class = CLIPConfig - config = staticmethod( - lambda: CLIPConfig.from_text_vision_configs( - CLIPTextConfig( - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ), - CLIPVisionConfig( - image_size=30, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ), - ) - ) - tokenizer_name = "openai/clip-vit-base-patch32" - # Default shape of inputs to use - default_text_input_samples_shape = (3, 64) - default_vision_input_samples_shape = (3, 3, 224, 224) - do_run_train_tests = False - - def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **kwargs): - # text inputs - shape = self.default_text_input_samples_shape - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.randint(0, vocab_size - 1)) - input_ids = torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() - # this is needed e.g. for BART - if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: - input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) - input_ids[:, -1] = config.eos_token_id - in_data = {"input_ids": input_ids} - - # vision inputs - shape = self.default_vision_input_samples_shape - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.random()) - pixel_values = torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() - in_data["pixel_values"] = pixel_values - - return in_data - - def add_head(self, *args, **kwargs): - pass - - -@require_torch -class CLIPAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPAdapterTestBase, - unittest.TestCase, -): - def test_adapter_fusion_save_with_head(self): - # This test is not applicable to CLIP - self.skipTest("Not applicable to CLIP.") diff --git a/tests/test_deberta.py b/tests/test_deberta.py deleted file mode 100644 index 68506fd0c6..0000000000 --- a/tests/test_deberta.py +++ /dev/null @@ -1,55 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import DebertaConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class DebertaAdapterTestBase(TextAdapterTestBase): - config_class = DebertaConfig - config = make_config( - DebertaConfig, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - relative_attention=True, - pos_att_type="p2c|c2p", - ) - tokenizer_name = "microsoft/deberta-base" - - -@require_torch -class DebertaAdapterTest( - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - AllMethodsTestMixin, - EmbeddingTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - DebertaAdapterTestBase, - unittest.TestCase, -): - def test_parallel_training_lora(self): - self.skipTest("Not supported for DeBERTa") - - -@require_torch -class DebertaClassConversionTest( - ModelClassConversionTestMixin, - DebertaAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_debertaV2.py b/tests/test_debertaV2.py deleted file mode 100644 index 94ff32c7f3..0000000000 --- a/tests/test_debertaV2.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import DebertaV2Config -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class DebertaV2AdapterTestBase(TextAdapterTestBase): - config_class = DebertaV2Config - config = make_config( - DebertaV2Config, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - relative_attention=True, - pos_att_type="p2c|c2p", - ) - tokenizer_name = "microsoft/deberta-v3-base" - - -@require_torch -class DebertaV2AdapterTest( - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - AllMethodsTestMixin, - EmbeddingTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - DebertaV2AdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class DebertaV2ClassConversionTest( - ModelClassConversionTestMixin, - DebertaV2AdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_distilbert.py b/tests/test_distilbert.py deleted file mode 100644 index 92d010389f..0000000000 --- a/tests/test_distilbert.py +++ /dev/null @@ -1,51 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import DistilBertConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class DistilBertAdapterTestBase(TextAdapterTestBase): - config_class = DistilBertConfig - config = make_config( - DistilBertConfig, - dim=32, - n_layers=4, - n_heads=4, - hidden_dim=37, - ) - tokenizer_name = "distilbert-base-uncased" - - -@require_torch -class DistilBertAdapterTest( - AllMethodsTestMixin, - EmbeddingTestMixin, - CompabilityTestMixin, - AdapterFusionModelTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - DistilBertAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class DistilBertClassConversionTest( - ModelClassConversionTestMixin, - DistilBertAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_electra.py b/tests/test_electra.py deleted file mode 100644 index 7dc6123796..0000000000 --- a/tests/test_electra.py +++ /dev/null @@ -1,52 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import ElectraConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class ElectraAdapterTestBase(TextAdapterTestBase): - config_class = ElectraConfig - config = make_config( - ElectraConfig, - # vocab_size=99, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "google/electra-base-generator" - - -@require_torch -class ElectraAdapterTest( - AllMethodsTestMixin, - EmbeddingTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - ElectraAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class ElectraClassConversionTest( - ModelClassConversionTestMixin, - ElectraAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_encoder_decoder.py b/tests/test_encoder_decoder.py deleted file mode 100644 index 6d02155f86..0000000000 --- a/tests/test_encoder_decoder.py +++ /dev/null @@ -1,90 +0,0 @@ -# flake8: noqa: F403,F405 -import unittest - -import adapters -from hf_transformers.tests.models.encoder_decoder.test_modeling_encoder_decoder import * # Imported to execute model tests -from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BertConfig -from transformers.testing_utils import require_torch, torch_device - -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase -from .test_adapter_fusion_common import AdapterFusionModelTestMixin - - -class EncoderDecoderAdapterTestBase(TextAdapterTestBase): - model_class = EncoderDecoderModel - config_class = EncoderDecoderConfig - config = staticmethod( - lambda: EncoderDecoderConfig.from_encoder_decoder_configs( - BertConfig( - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ), - BertConfig( - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - is_decoder=True, - add_cross_attention=True, - ), - ) - ) - tokenizer_name = "bert-base-uncased" - do_run_train_tests = False - - -@require_torch -class EncoderDecoderAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - EncoderDecoderAdapterTestBase, - unittest.TestCase, -): - def test_generation(self): - model = AutoModelForSeq2SeqLM.from_config(self.config()) - adapters.init(model) - model.add_adapter("test", config="pfeiffer") - model.set_active_adapters("test") - tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) - - text = "This is a test sentence." - input_ids = tokenizer(text, return_tensors="pt").input_ids - - generated_ids = model.generate(input_ids, bos_token_id=100) - generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] - self.assertNotEqual("", generated_text) - - def test_invertible_adapter_with_head(self): - """This test class is copied and adapted from the identically-named test in test_adapter_heads.py.""" - raise self.skipTest("AutoModelForSeq2SeqLM does not support using invertible adapters.") - - def test_adapter_fusion_save_with_head(self): - # This test is not applicable to the encoder-decoder model since it has no heads. - self.skipTest("Not applicable to the encoder-decoder model.") - - def test_forward_with_past(self): - # This test is not applicable to the encoder-decoder model since it has no heads. - self.skipTest("Not applicable to the encoder-decoder model.") - - def test_output_adapter_gating_scores_unipelt(self): - # TODO currently not supported - self.skipTest("Not implemented.") - - def test_output_adapter_fusion_attentions(self): - # TODO currently not supported - self.skipTest("Not implemented.") diff --git a/tests/test_gpt2.py b/tests/test_gpt2.py deleted file mode 100644 index ba71d3d316..0000000000 --- a/tests/test_gpt2.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import GPT2Config -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class GPT2AdapterTestBase(TextAdapterTestBase): - config_class = GPT2Config - config = make_config( - GPT2Config, - n_embd=32, - n_layer=4, - n_head=4, - # set pad token to eos token - pad_token_id=50256, - ) - tokenizer_name = "gpt2" - - -@require_torch -class GPT2AdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - EmbeddingTestMixin, - CompabilityTestMixin, - AdapterFusionModelTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - GPT2AdapterTestBase, - unittest.TestCase, -): - def test_parallel_training_lora(self): - self.skipTest("Not supported for GPT2") - - -@require_torch -class GPT2ClassConversionTest( - ModelClassConversionTestMixin, - GPT2AdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_gptj.py b/tests/test_gptj.py deleted file mode 100644 index 87ea6ae766..0000000000 --- a/tests/test_gptj.py +++ /dev/null @@ -1,68 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import GPTJConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class GPTJAdapterTestBase(TextAdapterTestBase): - config_class = GPTJConfig - config = make_config( - GPTJConfig, - n_embd=32, - n_layer=4, - n_head=4, - rotary_dim=4, - # set pad token to eos token - pad_token_id=50256, - resid_pdrop=0.1, - ) - tokenizer_name = "EleutherAI/gpt-j-6B" - - -@require_torch -class GPTJAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - ReftTestMixin, - UniPELTTestMixin, - PrefixTuningTestMixin, - EmbeddingTestMixin, - CompabilityTestMixin, - AdapterFusionModelTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - GPTJAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class GPTJClassConversionTest( - ModelClassConversionTestMixin, - GPTJAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_impl/__init__.py b/tests/test_impl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/methods/base.py b/tests/test_impl/base.py similarity index 100% rename from tests/methods/base.py rename to tests/test_impl/base.py diff --git a/tests/test_impl/composition/__init__.py b/tests/test_impl/composition/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/composition/test_adapter_composition.py b/tests/test_impl/composition/test_adapter_composition.py similarity index 99% rename from tests/composition/test_adapter_composition.py rename to tests/test_impl/composition/test_adapter_composition.py index 3d0d47412d..6398648cb8 100644 --- a/tests/composition/test_adapter_composition.py +++ b/tests/test_impl/composition/test_adapter_composition.py @@ -1,11 +1,12 @@ import unittest +import pytest import torch import adapters from adapters import IA3Config, LoRAConfig, PrefixTuningConfig, SeqBnConfig from adapters.composition import Average, BatchSplit, Fuse, Parallel, Split, Stack, parse_composition -from tests.test_adapter import ids_tensor +from tests.test_methods.base import ids_tensor from transformers import BertConfig, BertForSequenceClassification from transformers.testing_utils import require_torch, torch_device diff --git a/tests/composition/test_parallel.py b/tests/test_impl/composition/test_parallel.py similarity index 99% rename from tests/composition/test_parallel.py rename to tests/test_impl/composition/test_parallel.py index 8aa2c8a6df..61ac70581d 100644 --- a/tests/composition/test_parallel.py +++ b/tests/test_impl/composition/test_parallel.py @@ -1,6 +1,7 @@ import copy import random +import pytest import torch from adapters import ( diff --git a/tests/test_impl/core/__init__.py b/tests/test_impl/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_adapter_backward_compability.py b/tests/test_impl/core/test_adapter_backward_compability.py similarity index 96% rename from tests/test_adapter_backward_compability.py rename to tests/test_impl/core/test_adapter_backward_compability.py index 6ec2ef2143..be9177e0d7 100644 --- a/tests/test_adapter_backward_compability.py +++ b/tests/test_impl/core/test_adapter_backward_compability.py @@ -2,8 +2,10 @@ import os import tempfile +import pytest + from adapters import SeqBnConfig, __version__ -from tests.methods import create_twin_models +from tests.test_impl.base import create_twin_models from transformers.testing_utils import require_torch diff --git a/tests/test_adapter_config.py b/tests/test_impl/core/test_adapter_config.py similarity index 99% rename from tests/test_adapter_config.py rename to tests/test_impl/core/test_adapter_config.py index db57aeae2b..948150d6a0 100644 --- a/tests/test_adapter_config.py +++ b/tests/test_impl/core/test_adapter_config.py @@ -2,6 +2,8 @@ import unittest from dataclasses import FrozenInstanceError, dataclass +import pytest + from adapters import ( ADAPTER_CONFIG_MAP, AdapterConfig, diff --git a/tests/test_adapter_conversion.py b/tests/test_impl/core/test_adapter_conversion.py similarity index 99% rename from tests/test_adapter_conversion.py rename to tests/test_impl/core/test_adapter_conversion.py index 9653b3f340..c8ec96ece9 100644 --- a/tests/test_adapter_conversion.py +++ b/tests/test_impl/core/test_adapter_conversion.py @@ -2,6 +2,7 @@ import re import tempfile +import pytest import torch import adapters diff --git a/tests/test_adapter_fusion_common.py b/tests/test_impl/core/test_adapter_fusion_common.py similarity index 99% rename from tests/test_adapter_fusion_common.py rename to tests/test_impl/core/test_adapter_fusion_common.py index ccc860f667..914de3bd98 100644 --- a/tests/test_adapter_fusion_common.py +++ b/tests/test_impl/core/test_adapter_fusion_common.py @@ -3,6 +3,7 @@ import tempfile from dataclasses import asdict +import pytest import torch from adapters import ADAPTER_MODEL_MAPPING, ADAPTERFUSION_CONFIG_MAP, AdapterConfig, AutoAdapterModel, SeqBnConfig diff --git a/tests/test_adapter_fusion_config.py b/tests/test_impl/core/test_adapter_fusion_config.py similarity index 98% rename from tests/test_adapter_fusion_config.py rename to tests/test_impl/core/test_adapter_fusion_config.py index 0ad1860f20..8f88dcce18 100644 --- a/tests/test_adapter_fusion_config.py +++ b/tests/test_impl/core/test_adapter_fusion_config.py @@ -1,6 +1,8 @@ import unittest from dataclasses import FrozenInstanceError +import pytest + from adapters import ADAPTERFUSION_CONFIG_MAP, AdapterFusionConfig from transformers.testing_utils import require_torch diff --git a/tests/test_adapter_hub.py b/tests/test_impl/core/test_adapter_hub.py similarity index 99% rename from tests/test_adapter_hub.py rename to tests/test_impl/core/test_adapter_hub.py index fa29d13b19..7e5a2c56b4 100644 --- a/tests/test_adapter_hub.py +++ b/tests/test_impl/core/test_adapter_hub.py @@ -2,11 +2,13 @@ import unittest import numpy as np +import pytest import adapters from adapters import ADAPTER_CONFIG_MAP, AdapterConfig, BertAdapterModel, get_adapter_config_hash from adapters.trainer import AdapterTrainer as Trainer from adapters.utils import find_in_index +from tests.test_methods.base import ids_tensor from transformers import ( AutoModel, AutoTokenizer, @@ -17,8 +19,6 @@ ) from transformers.testing_utils import require_torch, torch_device -from .test_adapter import ids_tensor - SAMPLE_INDEX = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/hub-index.sample.json") diff --git a/tests/test_adapter_safetensors.py b/tests/test_impl/core/test_adapter_safetensors.py similarity index 99% rename from tests/test_adapter_safetensors.py rename to tests/test_impl/core/test_adapter_safetensors.py index 3c743c7a97..0a60bcc390 100644 --- a/tests/test_adapter_safetensors.py +++ b/tests/test_impl/core/test_adapter_safetensors.py @@ -4,6 +4,7 @@ import tempfile import unittest +import pytest import torch from adapters import BertAdapterModel, Fuse diff --git a/tests/test_adapter_save_id2label.py b/tests/test_impl/core/test_adapter_save_id2label.py similarity index 99% rename from tests/test_adapter_save_id2label.py rename to tests/test_impl/core/test_adapter_save_id2label.py index 4d8eba7051..cf8d5b4ff0 100644 --- a/tests/test_adapter_save_id2label.py +++ b/tests/test_impl/core/test_adapter_save_id2label.py @@ -2,6 +2,8 @@ from tempfile import TemporaryDirectory from typing import Dict +import pytest + import adapters from adapters import BertAdapterModel from transformers import BertConfig, BertForSequenceClassification diff --git a/tests/test_impl/embeddings/__init__.py b/tests/test_impl/embeddings/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_adapter_embeddings.py b/tests/test_impl/embeddings/test_adapter_embeddings.py similarity index 100% rename from tests/test_adapter_embeddings.py rename to tests/test_impl/embeddings/test_adapter_embeddings.py diff --git a/tests/test_impl/heads/__init__.py b/tests/test_impl/heads/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_adapter_custom_head.py b/tests/test_impl/heads/test_adapter_custom_head.py similarity index 98% rename from tests/test_adapter_custom_head.py rename to tests/test_impl/heads/test_adapter_custom_head.py index 8e29636d05..b7db138a2d 100644 --- a/tests/test_adapter_custom_head.py +++ b/tests/test_impl/heads/test_adapter_custom_head.py @@ -5,11 +5,10 @@ from adapters import AutoAdapterModel from adapters.heads import ClassificationHead, PredictionHead +from tests.test_methods.base import ids_tensor from transformers import AutoConfig from transformers.testing_utils import require_torch, torch_device -from .test_adapter import ids_tensor - class CustomHead(PredictionHead): def __init__( diff --git a/tests/test_adapter_heads.py b/tests/test_impl/heads/test_adapter_heads.py similarity index 99% rename from tests/test_adapter_heads.py rename to tests/test_impl/heads/test_adapter_heads.py index ffdd33e93a..c1426d1c6f 100644 --- a/tests/test_adapter_heads.py +++ b/tests/test_impl/heads/test_adapter_heads.py @@ -7,11 +7,10 @@ from adapters import ADAPTER_MODEL_MAPPING, AdapterSetup, AutoAdapterModel from adapters.composition import BatchSplit, Stack from adapters.heads import PredictionHead +from tests.test_impl.base import create_twin_models from transformers import AutoModelForSequenceClassification from transformers.testing_utils import require_torch, torch_device -from .methods import create_twin_models - @require_torch class PredictionHeadModelTestMixin: diff --git a/tests/test_impl/peft/__init__.py b/tests/test_impl/peft/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/methods/test_adapter_common.py b/tests/test_impl/peft/test_adapter_common.py similarity index 99% rename from tests/methods/test_adapter_common.py rename to tests/test_impl/peft/test_adapter_common.py index 1ea6cd6f37..98e6cf8b89 100644 --- a/tests/methods/test_adapter_common.py +++ b/tests/test_impl/peft/test_adapter_common.py @@ -1,6 +1,7 @@ import copy import tempfile +import pytest import torch import adapters @@ -19,11 +20,10 @@ SeqBnInvConfig, ) from adapters.heads.language_modeling import CausalLMHead +from tests.test_impl.base import AdapterMethodBaseTestMixin, create_twin_models from transformers import MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, CLIPConfig from transformers.testing_utils import require_torch, torch_device -from .base import AdapterMethodBaseTestMixin, create_twin_models - @require_torch class BottleneckAdapterTestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_compacter.py b/tests/test_impl/peft/test_compacter.py similarity index 98% rename from tests/methods/test_compacter.py rename to tests/test_impl/peft/test_compacter.py index 2c91b75366..d6365f8101 100644 --- a/tests/methods/test_compacter.py +++ b/tests/test_impl/peft/test_compacter.py @@ -1,8 +1,7 @@ from adapters import ADAPTER_MODEL_MAPPING, AutoAdapterModel, CompacterPlusPlusConfig +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch, torch_device -from .base import AdapterMethodBaseTestMixin - @require_torch class CompacterTestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_config_union.py b/tests/test_impl/peft/test_config_union.py similarity index 96% rename from tests/methods/test_config_union.py rename to tests/test_impl/peft/test_config_union.py index 12d82a5def..682d635eb4 100644 --- a/tests/methods/test_config_union.py +++ b/tests/test_impl/peft/test_config_union.py @@ -6,7 +6,7 @@ PrefixTuningConfig, SeqBnConfig, ) -from tests.methods.base import AdapterMethodBaseTestMixin +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/methods/test_ia3.py b/tests/test_impl/peft/test_ia3.py similarity index 96% rename from tests/methods/test_ia3.py rename to tests/test_impl/peft/test_ia3.py index 3a30e2448d..8356c1edd2 100644 --- a/tests/methods/test_ia3.py +++ b/tests/test_impl/peft/test_ia3.py @@ -1,8 +1,7 @@ from adapters import IA3Config +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch -from .base import AdapterMethodBaseTestMixin - @require_torch class IA3TestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_lora.py b/tests/test_impl/peft/test_lora.py similarity index 99% rename from tests/methods/test_lora.py rename to tests/test_impl/peft/test_lora.py index 067f78c8b8..946f28e7cd 100644 --- a/tests/methods/test_lora.py +++ b/tests/test_impl/peft/test_lora.py @@ -4,10 +4,9 @@ from adapters import LoRAConfig from adapters.methods.lora import LoRALayer +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch -from .base import AdapterMethodBaseTestMixin - @require_torch class LoRATestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_prefix_tuning.py b/tests/test_impl/peft/test_prefix_tuning.py similarity index 98% rename from tests/methods/test_prefix_tuning.py rename to tests/test_impl/peft/test_prefix_tuning.py index 9c3b0822a2..dd9360a933 100644 --- a/tests/methods/test_prefix_tuning.py +++ b/tests/test_impl/peft/test_prefix_tuning.py @@ -1,11 +1,10 @@ import torch from adapters import ADAPTER_MODEL_MAPPING, AutoAdapterModel, PrefixTuningConfig +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers import CLIPConfig from transformers.testing_utils import require_torch, torch_device -from .base import AdapterMethodBaseTestMixin - @require_torch class PrefixTuningTestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_prompt_tuning.py b/tests/test_impl/peft/test_prompt_tuning.py similarity index 96% rename from tests/methods/test_prompt_tuning.py rename to tests/test_impl/peft/test_prompt_tuning.py index 97015d1319..1b26d187b8 100644 --- a/tests/methods/test_prompt_tuning.py +++ b/tests/test_impl/peft/test_prompt_tuning.py @@ -1,8 +1,7 @@ from adapters import PromptTuningConfig +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch -from .base import AdapterMethodBaseTestMixin - @require_torch class PromptTuningTestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_reft.py b/tests/test_impl/peft/test_reft.py similarity index 98% rename from tests/methods/test_reft.py rename to tests/test_impl/peft/test_reft.py index 8849221808..3baa843c50 100644 --- a/tests/methods/test_reft.py +++ b/tests/test_impl/peft/test_reft.py @@ -1,8 +1,7 @@ from adapters import DiReftConfig, LoReftConfig, NoReftConfig +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch -from .base import AdapterMethodBaseTestMixin - @require_torch class ReftTestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/methods/test_unipelt.py b/tests/test_impl/peft/test_unipelt.py similarity index 97% rename from tests/methods/test_unipelt.py rename to tests/test_impl/peft/test_unipelt.py index d29fa5f18d..2e1786dc97 100644 --- a/tests/methods/test_unipelt.py +++ b/tests/test_impl/peft/test_unipelt.py @@ -1,8 +1,7 @@ from adapters import UniPELTConfig +from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch, torch_device -from .base import AdapterMethodBaseTestMixin - @require_torch class UniPELTTestMixin(AdapterMethodBaseTestMixin): diff --git a/tests/test_impl/trainer/__init__.py b/tests/test_impl/trainer/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_adapter_trainer.py b/tests/test_impl/trainer/test_adapter_trainer.py similarity index 100% rename from tests/test_adapter_trainer.py rename to tests/test_impl/trainer/test_adapter_trainer.py diff --git a/tests/extended/test_adapter_trainer_ext.py b/tests/test_impl/trainer/test_adapter_trainer_ext.py similarity index 99% rename from tests/extended/test_adapter_trainer_ext.py rename to tests/test_impl/trainer/test_adapter_trainer_ext.py index 6e14944654..3e49e25f5d 100644 --- a/tests/extended/test_adapter_trainer_ext.py +++ b/tests/test_impl/trainer/test_adapter_trainer_ext.py @@ -42,7 +42,7 @@ bindir = os.path.abspath(os.path.dirname(__file__)) -with ExtendSysPath(f"{bindir}/../../examples/pytorch/translation"): +with ExtendSysPath(f"{bindir}/../../../examples/pytorch/translation"): from run_translation import main # noqa diff --git a/tests/test_llama.py b/tests/test_llama.py deleted file mode 100644 index 8392fa32c2..0000000000 --- a/tests/test_llama.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest - -from transformers.models.llama.configuration_llama import LlamaConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class LlamaAdapterTestBase(TextAdapterTestBase): - config_class = LlamaConfig - config = make_config( - LlamaConfig, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=4, - intermediate_size=37, - hidden_act="gelu", - pad_token_id=0, - ) - tokenizer_name = "openlm-research/open_llama_13b" - - -@require_torch -class LlamaAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - EmbeddingTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - LlamaAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class LlamaClassConversionTest( - ModelClassConversionTestMixin, - LlamaAdapterTestBase, - unittest.TestCase, -): - def test_conversion_question_answering_model(self): - raise self.skipTest("We don't support the Llama QA model.") diff --git a/tests/test_mbart.py b/tests/test_mbart.py deleted file mode 100644 index 691e46e537..0000000000 --- a/tests/test_mbart.py +++ /dev/null @@ -1,62 +0,0 @@ -import unittest - -from transformers import MBartConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class MBartAdapterTestBase(TextAdapterTestBase): - config_class = MBartConfig - config = make_config( - MBartConfig, - d_model=16, - encoder_layers=2, - decoder_layers=2, - encoder_attention_heads=4, - decoder_attention_heads=4, - encoder_ffn_dim=4, - decoder_ffn_dim=4, - vocab_size=250027, - ) - tokenizer_name = "facebook/mbart-large-cc25" - - -@require_torch -class MBartAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - MBartAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class MBartClassConversionTest( - ModelClassConversionTestMixin, - MBartAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_methods/__init__.py b/tests/test_methods/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_adapter.py b/tests/test_methods/base.py similarity index 96% rename from tests/test_adapter.py rename to tests/test_methods/base.py index 6cf17c981e..392c9ea1ff 100644 --- a/tests/test_adapter.py +++ b/tests/test_methods/base.py @@ -16,6 +16,14 @@ def make_config(config_class, **kwargs): return staticmethod(lambda: config_class(**kwargs)) +def ids_tensor(shape, dtype=torch.long, vocab_size=5000): + total_dims = 1 + for dim in shape: + total_dims *= dim + values = [global_rng.randint(0, vocab_size - 1) for _ in range(total_dims)] + return torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() + + class AbstractAdapterTestBase: """Base class for adapter tests. Defines basic functions and attributes with default values which are used in the tests. Model test classes should inherit from this class or subclass and override the attributes and functions as needed. diff --git a/tests/test_methods/imports.py b/tests/test_methods/imports.py new file mode 100644 index 0000000000..09e8952848 --- /dev/null +++ b/tests/test_methods/imports.py @@ -0,0 +1,21 @@ +import unittest +from math import ceil + +import pytest + +from tests.test_impl.composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin +from tests.test_impl.core.test_adapter_backward_compability import CompabilityTestMixin +from tests.test_impl.core.test_adapter_conversion import ModelClassConversionTestMixin +from tests.test_impl.core.test_adapter_fusion_common import AdapterFusionModelTestMixin +from tests.test_impl.embeddings.test_adapter_embeddings import EmbeddingTestMixin +from tests.test_impl.heads.test_adapter_heads import PredictionHeadModelTestMixin +from tests.test_impl.peft.test_adapter_common import BottleneckAdapterTestMixin +from tests.test_impl.peft.test_compacter import CompacterTestMixin +from tests.test_impl.peft.test_ia3 import IA3TestMixin +from tests.test_impl.peft.test_lora import LoRATestMixin +from tests.test_impl.peft.test_prefix_tuning import PrefixTuningTestMixin +from tests.test_impl.peft.test_prompt_tuning import PromptTuningTestMixin +from tests.test_impl.peft.test_reft import ReftTestMixin +from tests.test_impl.peft.test_unipelt import UniPELTTestMixin +from tests.test_methods.base import TextAdapterTestBase, make_config +from transformers.testing_utils import require_torch diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py new file mode 100644 index 0000000000..6fb408531c --- /dev/null +++ b/tests/test_methods/test_albert.py @@ -0,0 +1,142 @@ +from transformers import AlbertConfig + +from .imports import * + + +class AlbertAdapterTestBase(TextAdapterTestBase): + """Model configuration for testing methods on Albert.""" + + config_class = AlbertConfig + config = make_config( + AlbertConfig, + embedding_size=16, + hidden_size=64, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + num_hidden_groups=2, + ) + tokenizer_name = "albert-base-v2" + leave_out_layers = [0] + + +@require_torch +class Core( + AlbertAdapterTestBase, + ModelClassConversionTestMixin, + CompabilityTestMixin, + AdapterFusionModelTestMixin, + unittest.TestCase, +): + def test_context_simple(self): + expected_number_of_adapter_calls = ceil(self.config().num_hidden_layers / self.config().num_hidden_groups) + super().test_context_simple(expected_number_of_adapter_calls=expected_number_of_adapter_calls) + + +@require_torch +class Composition( + AlbertAdapterTestBase, + ParallelAdapterInferenceTestMixin, + ParallelTrainingMixin, + unittest.TestCase, +): + pass + + +@require_torch +class Heads( + AlbertAdapterTestBase, + PredictionHeadModelTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class Embeddings( + AlbertAdapterTestBase, + EmbeddingTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class ClassConversion( + ModelClassConversionTestMixin, + AlbertAdapterTestBase, + unittest.TestCase, +): + pass + + +@require_torch +class PrefixTuning( + AlbertAdapterTestBase, + PrefixTuningTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class PromptTuning( + AlbertAdapterTestBase, + PromptTuningTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class ReFT( + AlbertAdapterTestBase, + ReftTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class UniPELT( + AlbertAdapterTestBase, + UniPELTTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class Compacter( + AlbertAdapterTestBase, + CompacterTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class Bottleneck( + AlbertAdapterTestBase, + BottleneckAdapterTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class IA3( + AlbertAdapterTestBase, + IA3TestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class LoRA( + AlbertAdapterTestBase, + LoRATestMixin, + unittest.TestCase, +): + pass diff --git a/tests/test_mistral.py b/tests/test_mistral.py deleted file mode 100644 index 960d3f0443..0000000000 --- a/tests/test_mistral.py +++ /dev/null @@ -1,66 +0,0 @@ -import unittest - -from transformers.models.mistral.configuration_mistral import MistralConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class MistralAdapterTestBase(TextAdapterTestBase): - config_class = MistralConfig - config = make_config( - MistralConfig, - hidden_size=32, - num_hidden_layers=5, - num_attention_heads=8, - intermediate_size=37, - hidden_act="gelu", - hidden_dropout_prob=0.1, - pad_token_id=0, - ) - tokenizer_name = "HuggingFaceH4/zephyr-7b-beta" - - -@require_torch -class MistralAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - EmbeddingTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - MistralAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class MistralClassConversionTest( - ModelClassConversionTestMixin, - MistralAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_models/__init__.py b/tests/test_models/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/models/base.py b/tests/test_models/base.py similarity index 100% rename from tests/models/base.py rename to tests/test_models/base.py diff --git a/tests/models/test_albert.py b/tests/test_models/test_albert.py similarity index 100% rename from tests/models/test_albert.py rename to tests/test_models/test_albert.py diff --git a/tests/test_mt5.py b/tests/test_mt5.py deleted file mode 100644 index d1f76325d7..0000000000 --- a/tests/test_mt5.py +++ /dev/null @@ -1,68 +0,0 @@ -import unittest - -from transformers import MT5Config -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -@require_torch -class MT5AdapterTestBase(TextAdapterTestBase): - config_class = MT5Config - config = make_config( - MT5Config, - d_model=16, - num_layers=2, - num_decoder_layers=2, - num_heads=4, - d_ff=4, - d_kv=16 // 4, - tie_word_embeddings=False, - decoder_start_token_id=0, - ) - tokenizer_name = "google/mt5-base" - - -@require_torch -class MT5AdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - EmbeddingTestMixin, - CompabilityTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - AdapterFusionModelTestMixin, - PredictionHeadModelTestMixin, - MT5AdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class MT5ClassConversionTest( - ModelClassConversionTestMixin, - MT5AdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_plbart.py b/tests/test_plbart.py deleted file mode 100644 index b268f6b2d2..0000000000 --- a/tests/test_plbart.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import PLBartConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class PLBartAdapterTestBase(TextAdapterTestBase): - config_class = PLBartConfig - config = make_config( - PLBartConfig, - d_model=16, - encoder_layers=2, - decoder_layers=2, - encoder_attention_heads=4, - decoder_attention_heads=4, - encoder_ffn_dim=4, - decoder_ffn_dim=4, - scale_embedding=False, # Required for embedding tests - ) - tokenizer_name = "uclanlp/plbart-base" - - -@require_torch -class PLBartAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - EmbeddingTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - PLBartAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class PLBartClassConversionTest( - ModelClassConversionTestMixin, - PLBartAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_roberta.py b/tests/test_roberta.py deleted file mode 100644 index 2e6df220e6..0000000000 --- a/tests/test_roberta.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import RobertaConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class RobertaAdapterTestBase(TextAdapterTestBase): - config_class = RobertaConfig - config = make_config( - RobertaConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - vocab_size=50265, - ) - tokenizer_name = "roberta-base" - - -@require_torch -class RobertaAdapterTest( - AllMethodsTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ConfigUnionAdapterTest, - RobertaAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class RobertaClassConversionTest( - ModelClassConversionTestMixin, - RobertaAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_t5.py b/tests/test_t5.py deleted file mode 100644 index 82aa9f1047..0000000000 --- a/tests/test_t5.py +++ /dev/null @@ -1,68 +0,0 @@ -import unittest - -from transformers import T5Config -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -@require_torch -class T5AdapterTestBase(TextAdapterTestBase): - config_class = T5Config - config = make_config( - T5Config, - d_model=16, - num_layers=2, - num_decoder_layers=2, - num_heads=4, - d_ff=4, - d_kv=16 // 4, - tie_word_embeddings=False, - decoder_start_token_id=0, - ) - tokenizer_name = "t5-base" - - -@require_torch -class T5AdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - EmbeddingTestMixin, - CompabilityTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - AdapterFusionModelTestMixin, - PredictionHeadModelTestMixin, - T5AdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class T5ClassConversionTest( - ModelClassConversionTestMixin, - T5AdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_vit.py b/tests/test_vit.py deleted file mode 100644 index a4bdd0afb9..0000000000 --- a/tests/test_vit.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest - -from transformers import ViTConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import AllMethodsTestMixin -from .test_adapter import VisionAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class ViTAdapterTestBase(VisionAdapterTestBase): - config_class = ViTConfig - config = make_config( - ViTConfig, - image_size=224, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - feature_extractor_name = "google/vit-base-patch16-224-in21k" - - -@require_torch -class ViTAdapterTest( - AllMethodsTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ViTAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class ViTClassConversionTest( - ModelClassConversionTestMixin, - ViTAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_whisper.py b/tests/test_whisper.py deleted file mode 100644 index c28ac20752..0000000000 --- a/tests/test_whisper.py +++ /dev/null @@ -1,72 +0,0 @@ -import unittest - -from tests.methods.test_config_union import ConfigUnionAdapterTest -from transformers import WhisperConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from .methods import ( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, -) -from .test_adapter import AudioAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_embeddings import EmbeddingTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class WhisperAdapterTestBase(AudioAdapterTestBase): - config_class = WhisperConfig - config = make_config( - WhisperConfig, - d_model=16, - encoder_layers=2, - decoder_layers=2, - encoder_attention_heads=4, - decoder_attention_heads=4, - encoder_ffn_dim=4, - decoder_ffn_dim=4, - vocab_size=51865, - ) - tokenizer_name = "openai/whisper-small" - sampling_rate = 16000 - decoder_start_token_id = 50257 - - -@require_torch -class WhisperAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - EmbeddingTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - ConfigUnionAdapterTest, - WhisperAdapterTestBase, - unittest.TestCase, -): - def test_parallel_training_lora(self): - self.skipTest("Not supported for Whisper") - - -@require_torch -class WhisperClassConversionTest( - ModelClassConversionTestMixin, - WhisperAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_xlm_roberta.py b/tests/test_xlm_roberta.py deleted file mode 100644 index 320251f920..0000000000 --- a/tests/test_xlm_roberta.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest - -from transformers import XLMRobertaConfig -from transformers.testing_utils import require_torch - -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin - - -class XLMRobertaAdapterTestBase(TextAdapterTestBase): - config_class = XLMRobertaConfig - config = make_config( - XLMRobertaConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - vocab_size=250002, - ) - tokenizer_name = "xlm-roberta-base" - - -@require_torch -class XLMRobertaAdapterTest( - AllMethodsTestMixin, - AdapterFusionModelTestMixin, - XLMRobertaAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class XLMRobertaClassConversionTest( - ModelClassConversionTestMixin, - XLMRobertaAdapterTestBase, - unittest.TestCase, -): - pass diff --git a/tests/test_xmod.py b/tests/test_xmod.py deleted file mode 100644 index 2306aa9c44..0000000000 --- a/tests/test_xmod.py +++ /dev/null @@ -1,49 +0,0 @@ -import unittest - -from transformers import XmodConfig -from transformers.testing_utils import require_torch - -from .composition.test_parallel import ParallelAdapterInferenceTestMixin -from .methods import AllMethodsTestMixin -from .test_adapter import TextAdapterTestBase, make_config -from .test_adapter_backward_compability import CompabilityTestMixin -from .test_adapter_conversion import ModelClassConversionTestMixin -from .test_adapter_fusion_common import AdapterFusionModelTestMixin -from .test_adapter_heads import PredictionHeadModelTestMixin - - -class XmodAdapterTestBase(TextAdapterTestBase): - config_class = XmodConfig - config = make_config( - XmodConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - vocab_size=250002, - max_position_embeddings=512, - default_language="en_XX", - ) - tokenizer_name = "xlm-roberta-base" - - -@require_torch -class XmodAdapterTest( - AllMethodsTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - PredictionHeadModelTestMixin, - ParallelAdapterInferenceTestMixin, - XmodAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -class XmodClassConversionTest( - ModelClassConversionTestMixin, - XmodAdapterTestBase, - unittest.TestCase, -): - pass From 630b7223468a673fa8c058ce8560f9d73792ce61 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 1 Nov 2024 14:24:45 +0100 Subject: [PATCH 05/63] remove redundant imports --- tests/test_impl/composition/test_adapter_composition.py | 1 - tests/test_impl/composition/test_parallel.py | 1 - tests/test_impl/core/test_adapter_backward_compability.py | 2 -- tests/test_impl/core/test_adapter_config.py | 2 -- tests/test_impl/core/test_adapter_conversion.py | 1 - tests/test_impl/core/test_adapter_fusion_common.py | 1 - tests/test_impl/core/test_adapter_fusion_config.py | 2 -- tests/test_impl/core/test_adapter_hub.py | 1 - tests/test_impl/core/test_adapter_safetensors.py | 1 - tests/test_impl/core/test_adapter_save_id2label.py | 2 -- tests/test_impl/peft/test_adapter_common.py | 1 - 11 files changed, 15 deletions(-) diff --git a/tests/test_impl/composition/test_adapter_composition.py b/tests/test_impl/composition/test_adapter_composition.py index 6398648cb8..417619dd55 100644 --- a/tests/test_impl/composition/test_adapter_composition.py +++ b/tests/test_impl/composition/test_adapter_composition.py @@ -1,6 +1,5 @@ import unittest -import pytest import torch import adapters diff --git a/tests/test_impl/composition/test_parallel.py b/tests/test_impl/composition/test_parallel.py index 61ac70581d..8aa2c8a6df 100644 --- a/tests/test_impl/composition/test_parallel.py +++ b/tests/test_impl/composition/test_parallel.py @@ -1,7 +1,6 @@ import copy import random -import pytest import torch from adapters import ( diff --git a/tests/test_impl/core/test_adapter_backward_compability.py b/tests/test_impl/core/test_adapter_backward_compability.py index be9177e0d7..90150267ab 100644 --- a/tests/test_impl/core/test_adapter_backward_compability.py +++ b/tests/test_impl/core/test_adapter_backward_compability.py @@ -2,8 +2,6 @@ import os import tempfile -import pytest - from adapters import SeqBnConfig, __version__ from tests.test_impl.base import create_twin_models from transformers.testing_utils import require_torch diff --git a/tests/test_impl/core/test_adapter_config.py b/tests/test_impl/core/test_adapter_config.py index 948150d6a0..db57aeae2b 100644 --- a/tests/test_impl/core/test_adapter_config.py +++ b/tests/test_impl/core/test_adapter_config.py @@ -2,8 +2,6 @@ import unittest from dataclasses import FrozenInstanceError, dataclass -import pytest - from adapters import ( ADAPTER_CONFIG_MAP, AdapterConfig, diff --git a/tests/test_impl/core/test_adapter_conversion.py b/tests/test_impl/core/test_adapter_conversion.py index c8ec96ece9..9653b3f340 100644 --- a/tests/test_impl/core/test_adapter_conversion.py +++ b/tests/test_impl/core/test_adapter_conversion.py @@ -2,7 +2,6 @@ import re import tempfile -import pytest import torch import adapters diff --git a/tests/test_impl/core/test_adapter_fusion_common.py b/tests/test_impl/core/test_adapter_fusion_common.py index 914de3bd98..ccc860f667 100644 --- a/tests/test_impl/core/test_adapter_fusion_common.py +++ b/tests/test_impl/core/test_adapter_fusion_common.py @@ -3,7 +3,6 @@ import tempfile from dataclasses import asdict -import pytest import torch from adapters import ADAPTER_MODEL_MAPPING, ADAPTERFUSION_CONFIG_MAP, AdapterConfig, AutoAdapterModel, SeqBnConfig diff --git a/tests/test_impl/core/test_adapter_fusion_config.py b/tests/test_impl/core/test_adapter_fusion_config.py index 8f88dcce18..0ad1860f20 100644 --- a/tests/test_impl/core/test_adapter_fusion_config.py +++ b/tests/test_impl/core/test_adapter_fusion_config.py @@ -1,8 +1,6 @@ import unittest from dataclasses import FrozenInstanceError -import pytest - from adapters import ADAPTERFUSION_CONFIG_MAP, AdapterFusionConfig from transformers.testing_utils import require_torch diff --git a/tests/test_impl/core/test_adapter_hub.py b/tests/test_impl/core/test_adapter_hub.py index 7e5a2c56b4..62120fa740 100644 --- a/tests/test_impl/core/test_adapter_hub.py +++ b/tests/test_impl/core/test_adapter_hub.py @@ -2,7 +2,6 @@ import unittest import numpy as np -import pytest import adapters from adapters import ADAPTER_CONFIG_MAP, AdapterConfig, BertAdapterModel, get_adapter_config_hash diff --git a/tests/test_impl/core/test_adapter_safetensors.py b/tests/test_impl/core/test_adapter_safetensors.py index 0a60bcc390..3c743c7a97 100644 --- a/tests/test_impl/core/test_adapter_safetensors.py +++ b/tests/test_impl/core/test_adapter_safetensors.py @@ -4,7 +4,6 @@ import tempfile import unittest -import pytest import torch from adapters import BertAdapterModel, Fuse diff --git a/tests/test_impl/core/test_adapter_save_id2label.py b/tests/test_impl/core/test_adapter_save_id2label.py index cf8d5b4ff0..4d8eba7051 100644 --- a/tests/test_impl/core/test_adapter_save_id2label.py +++ b/tests/test_impl/core/test_adapter_save_id2label.py @@ -2,8 +2,6 @@ from tempfile import TemporaryDirectory from typing import Dict -import pytest - import adapters from adapters import BertAdapterModel from transformers import BertConfig, BertForSequenceClassification diff --git a/tests/test_impl/peft/test_adapter_common.py b/tests/test_impl/peft/test_adapter_common.py index 98e6cf8b89..5d58239af6 100644 --- a/tests/test_impl/peft/test_adapter_common.py +++ b/tests/test_impl/peft/test_adapter_common.py @@ -1,7 +1,6 @@ import copy import tempfile -import pytest import torch import adapters From 0d3577fc7021a53275e31969e7d2560d20050b53 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 1 Nov 2024 14:25:13 +0100 Subject: [PATCH 06/63] Add pytest markers and respective pytest commands --- Makefile | 62 +++++++++++++++++++++++++++++-- pyproject.toml | 16 ++++++++ tests/test_methods/test_albert.py | 13 +++++++ 3 files changed, 87 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 04b9374133..f12142d5e9 100644 --- a/Makefile +++ b/Makefile @@ -28,18 +28,72 @@ style: isort $(check_dirs) ${MAKE} extra_style_checks -# Run tests for the library +# Library Tests +# run all tests in the library test: python -m pytest -n auto --dist=loadfile -s -v ./tests/ +# run tests for the adapter methods test-adapter-methods: - python -m pytest --ignore ./tests/models -n auto --dist=loadfile -s -v ./tests/ + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ +# run tests for the adapter models test-adapter-models: - python -m pytest -n auto --dist=loadfile -s -v ./tests/models + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_models/ -# Run tests for examples +# run the core tests for all models +test-adapter-core: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m core + +# run the adapter composition tests for all models +test-adapter-composition: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m composition + +# run the head tests for all models +test-adapter-heads: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m heads + +# run the embedding teasts for all models +test-adapter-embeddings: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m embeddings + +# run the class conversion tests for all models +test-adapter-class_conversion: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m class_conversion + +# run the prefix tuning tests for all models +test-adapter-prefix_tuning: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m prefix_tuning + +# run the prompt tuning tests for all models +test-adapter-prompt_tuning: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m prompt_tuning +# run the reft tests for all models +test-adapter-reft: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m reft + +# run the unipelt tests for all models +test-adapter-unipelt: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m unipelt + +# run the compacter tests for all models +test-adapter-compacter: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m compacter + +# run the bottleneck tests for all models +test-adapter-bottleneck: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m bottleneck + +# run the ia3 tests for all models +test-adapter-ia3: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m ia3 + +# run the lora tests for all models +test-adapter-lora: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m lora + +# Run tests for examples test-examples: python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/ diff --git a/pyproject.toml b/pyproject.toml index ad2437e9a8..0a78cd3b1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,19 @@ [tool.black] line-length = 119 target-version = ['py38', 'py39', 'py310'] +[tool.pytest.ini_options] +markers = [ + "core: marks tests as core adapter test", + "composition: marks tests as composition adapter test", + "heads: marks tests as heads adapter test", + "embeddings: marks tests as embeddings adapter test", + "class_conversion: marks tests as class conversion adapter test", + "prefix_tuning: marks tests as prefix tuning adapter test", + "prompt_tuning: marks tests as prompt tuning adapter test", + "reft: marks tests as reft adapter test", + "unipelt: marks tests as unipelt adapter test", + "compacter: marks tests as compacter adapter test", + "bottleneck: marks tests as bottleneck adapter test", + "ia3: marks tests as ia3 adapter test", + "lora: marks tests as lora adapter test", +] \ No newline at end of file diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index 6fb408531c..c508c9b54f 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -21,6 +21,7 @@ class AlbertAdapterTestBase(TextAdapterTestBase): @require_torch +@pytest.mark.core class Core( AlbertAdapterTestBase, ModelClassConversionTestMixin, @@ -34,6 +35,7 @@ def test_context_simple(self): @require_torch +@pytest.mark.composition class Composition( AlbertAdapterTestBase, ParallelAdapterInferenceTestMixin, @@ -44,6 +46,7 @@ class Composition( @require_torch +@pytest.mark.heads class Heads( AlbertAdapterTestBase, PredictionHeadModelTestMixin, @@ -53,6 +56,7 @@ class Heads( @require_torch +@pytest.mark.embeddings class Embeddings( AlbertAdapterTestBase, EmbeddingTestMixin, @@ -62,6 +66,7 @@ class Embeddings( @require_torch +@pytest.mark.class_conversion class ClassConversion( ModelClassConversionTestMixin, AlbertAdapterTestBase, @@ -71,6 +76,7 @@ class ClassConversion( @require_torch +@pytest.mark.prefix_tuning class PrefixTuning( AlbertAdapterTestBase, PrefixTuningTestMixin, @@ -80,6 +86,7 @@ class PrefixTuning( @require_torch +@pytest.mark.prompt_tuning class PromptTuning( AlbertAdapterTestBase, PromptTuningTestMixin, @@ -89,6 +96,7 @@ class PromptTuning( @require_torch +@pytest.mark.reft class ReFT( AlbertAdapterTestBase, ReftTestMixin, @@ -98,6 +106,7 @@ class ReFT( @require_torch +@pytest.mark.unipelt class UniPELT( AlbertAdapterTestBase, UniPELTTestMixin, @@ -107,6 +116,7 @@ class UniPELT( @require_torch +@pytest.mark.compacter class Compacter( AlbertAdapterTestBase, CompacterTestMixin, @@ -116,6 +126,7 @@ class Compacter( @require_torch +@pytest.mark.bottleneck class Bottleneck( AlbertAdapterTestBase, BottleneckAdapterTestMixin, @@ -125,6 +136,7 @@ class Bottleneck( @require_torch +@pytest.mark.ia3 class IA3( AlbertAdapterTestBase, IA3TestMixin, @@ -134,6 +146,7 @@ class IA3( @require_torch +@pytest.mark.lora class LoRA( AlbertAdapterTestBase, LoRATestMixin, From 1300856f073f818571ceab22980a2f2582d40158 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 1 Nov 2024 14:46:41 +0100 Subject: [PATCH 07/63] Add draft of README --- tests/README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++ tests/image.png | Bin 0 -> 36003 bytes 2 files changed, 65 insertions(+) create mode 100644 tests/README.md create mode 100644 tests/image.png diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000000..832ac82c8f --- /dev/null +++ b/tests/README.md @@ -0,0 +1,65 @@ +# Testing the adapters library + +This README gives an overview of how the test directory is organized and the possibilities to group and execute different kinds of tests. +## Test directory structure + +``` +tests/ +├── __init__.py +├── fixtures/ # Datasets, samples, ... +| └── ... +├── test_impl/ # Test Implementations +│ ├── __init__.py +│ ├── composition/ +│ │ ├── __init__.py +│ │ ├── test_adapter_composition.py +│ │ └── test_parallel.py +│ ├── core/ +│ │ ├── __init__.py +│ │ ├── test_adapter_config.py +│ │ ├── test_adapter_conversion.py +│ │ └── ... +│ ├── embeddings/ +│ └── ... +├── test_methods/ # Test entry points +│ └── __init__.py +├── test_models/ # Test entry points +│ └── __init__.py +``` + +## Test Types + +1. Adapter method tests: test the **implementation of the adapter methods**, such as the different kind of adapters or costum heads. + - These tests are exectued for each model, hence there is a testfile for each model, e.g. `test_albert.py` + - Each model test file is organized in various test classes to group similar tests + - While this results in a little bit more boilerplate code, it allows for an organized view in the test viewer, which in return also allows to conviniently execute subgroups of test, e.g. like this: + ![alt text](image.png) +2. Adapter model tests: test the **implementation of the adapter models** on which the adapter methods can be used. + - We resort to the thorough test suite of Hugging Face and test our models on it. + +## Utilizing pytest markers + +Each class in each model test file in `tests/test_methods` is decorated with a marker of a certain type, e.g.: +``` python +@require_torch +@pytest.mark.lora +class LoRA( + AlbertAdapterTestBase, + LoRATestMixin, + unittest.TestCase, +): + pass +``` + +These markers can be used to execute a certain type of test **for every model**: +- e.g.: for executing the compacter tests for every model we can write: + ```bash + cd tests/test_methods + pytest -m lora + ``` + This command will execute all lora tests for every model in the adapters libray + +Alternatively to navigating to `tests/test_methods` in the terminal you can select a command from the `Makefile` in the root directory and launch such a subset of test via e.g.: +```bash +make test-adapter-lora +``` \ No newline at end of file diff --git a/tests/image.png b/tests/image.png new file mode 100644 index 0000000000000000000000000000000000000000..34e881b9436d0675a2d3435fb9a137b38dfbc26c GIT binary patch literal 36003 zcmdSBWmp{TmM)A0570QlB?N*)aCi6M?(Py?f&|wDcXti$?(Xic!5vQV?sv~Ov(KJ0 z=luMBaB+3hR98Ly)VkL#tApiaL=oX};K9Ja5XHrW6u`j1dx4)(SZLrG2!=u-;1{@q zf~Wvk*(lxt@Zg;ZzcfD>SY;%_lO7cC7|vEq%>fJysq5_ryw9e<5DYAgOC9qdO3-s9ah>y$rp zxR=-5s#}!*EE#i6tGV%7M8u~569W=}B^EL}d-~`P5(t6{hz1^4PUMwMMvdc?dTLlx215BA6;DLUTq3XlLD_V!(8mvw=}VG{stGv>UJA<5y~86yO|Tl z^^j|gY-A`V_xP>qKChLrgCh_)Y(vsMh=8DVx?#;+>R?d`wX18paQI8r?`p+SorhkE zfY>{EvqZe|l9+r_l5H`gfNjnC_2HHqY!1hQr;B*W+`K%I2z<_6DJve{rwwGo(G>QJ z4j4kVudu}a)t^eO8E(F@8Ct6rP>&5n|5Q>Moqjl`qWY`-_2l`&5f3e$#?c zR@%|QI9N%Fc#-Rm*(cXO-cLxapD>x z(J<_?19F*^sGd;oyvK^!b>m}0IEK-+#%ugU27^Y&(`B{G%Zm%6oxxc9hbuGgT0h3@ z?CcT?e0B0sX$gs#?rx#E%4hZ4#_`%r!7Zc1>B1yrp69EKoSdBgr~60=b@ik$9A=3z zXF3FByL28;$2G5Xu(&-mS?Y0jzEzWS&rg-ggj;DRt}`<;F)1n3oZp&@=THP-%?Qfa zdl@AQA_#eDomXA+-(%DB!CXs@Ht>^%fW~riu457t3lC1)$vAQ?uogUOyK6R?YO(|x zHGJXQ3V|CQ`^H#d8ya4;%U7>iqaODe?gtTmpcEgo1`B*rgE*CzswUv}84iWo< zCf%l!<(M{MkM=hL31?|IIMip$y-WG!yFK(`bN#tFXLx?Y(zo4tIQ}a6OcB)lB3k3CqNKEg>JpTWBJHg0uXC@llr)1Az-Id)gkbSlexvCbOKk(YWnBudeN4=8YJZ!%39{axzxY?QA2GY zN~}C=qmJv|{VqeB;jGAGv79Gz*yY+^#H3bTuj<_$k^F^0r5$Cr)qB9Ds{yX^y9E&r z?sd0{DA~7bqA(((lkyuIUYrZ&K7$aU8FW)Xkb7?noo2ll)P;R%Gns#s#fNf9Ibwg_ z=Y!uD%MQFYzbJ54sXj+Xr$QQ;>Sk|LBc`IrV6oG z^OJitfDI9xxPNf4_T5<;hyI3)MZJ<|bXwXc)-|sBo=ybfK*?DzUfXnia5;1Hq()Oe zUydftBHNZMZRc@1$6{0gv7`0pg)rj_C8J!+nv$-)>};}C)V`^i8K)BdAktl&zIRXq zq#Jq{`r2vebJJ`NN59=~ttCO5G{wx~0eaVmGn5d;%B7ACjC!Amk@wcIT@%o{vpxht z?|$5{@1a!D@m*Ba{h(h)M@-x?16@MR>!@qdo3WVbbS`Ml1vz%{_SAjZdaE$K z6Jv9&6#z5rSuhx z5A8|x~{uAYHC8GizJQ!961NZSM%g0@NW-1wQ?7HZw>G`}?vjd?S zU;d;RNvmUS*Jlu!(7xUMwM=g3(#Vn}X&x7F>`sTjO0=3}Xd7w5zEkkr%zFffgjAKJ zyi&2p3Gd5PIE^08tBOpmm!~!cKD<3W{liY{W zyTUU_Bvb5*B#h@gYyJxT9y_Dcc7UDLR0svqRbkR?V0d`i(Rm#;b+RVZl1F1x-7ZO8 zo_K|Z--Clu;gcFf14|pZE*n@HT-gi!M$kazD{1GSr3S?62Ud{gkWQQq)-G`#Q z51G&}o*Mycs#*0)5d_?nFvtYcPg+gi25r24D@aI4q~y-+G_AEr3V$1j$yHwaHQ*1o zMWa_71K^k${?5)0M|se5tLNhzzSX1=4J`^s8EPW%zYNHFk+|QUPS{hHxCdez8>YmR zDYv~R4*c=spyhbs_uN8^dT-bVgO$AzWi@v#D*m=)xZ1d)RjYDFI! z)c^9RQLzu>U>D>Q8T8zHC@CweGF0@R;ES@NJFF)zDd|&BPfz1nuwbBqR@_3F7NHt1 z<*0_TMzd53TSqLk9Fmu9cW>`LgI5^PuparM1yP^18V{dHgluAsXj+t2KQ4{M^&iD4#jDGI9-_|d-b@Cw z5WISQj?+=PeG;JOtjS-KIPid3ns5wZ5pkf zMPakiDI60Yzc+l1Smp%amMO+Q!^6NTi{(v2R~*<78)wru+Hbd~D~!7Bd>EoZA5T;N zS~f{+aOk`Amn${b(}r|b33SA>*kAqqqu&{s=;bbGHKT+RO+Nk~A?$qUF9;*R_-Mm4 zQwkrS(={0`G{wwA_(eiNv+i*(I|5yZs2Ddm1qQ> zd^|m7e*2{FkW;5WTp1MAw~7K70hib38Y6uS(*UCd0?wP*z8+R#;U0CELwLA@Pvy-Q zAs*K=D(Xnoi$Fs+XTAoh9#{BVG*avXmrKRd^I7`7YsPq0PG1DRMJn%A4nt#RGB zKZ=;6jni~oS6u8an)dE~i}1C;73>yF-oGGQwwZ>K#2=&(PxWu{u|w!Dd=y~*4}4T0 zysZD>ir#~^+XJ&(i3n6ILy!A>jU4V zW(Ig%0UdZFD$~a>+nrCJJ{|g!j?w{b!&XC9r&v-XogUNX>5Br`+adqPPbPOIN&pD# zX$1a(r%aOA8|GxdXHq6?%76;IkcHweyhVg31zx7VRfH)5fTy^SzvY?pPY*`Z4C}L- zqx?!-l&d#*HnS>|aOx=D0L{FHueRhq>4$$zVf#!P=~cKS_~8TUbZe?*Yhk%m#VBwr z0rj88Ekezi9{!GQ`-uOZdhm zX)G@OgG)BA*I2YY?I;u$7EZ(m>(G`dwg4BxJ^l%kG)s!8dJ1T`70z0+5hs>5q8juv ztGLKTWt5fJzaITAFOkx&Idq|9RM09*tAN2`Np`LLX3$=Zi3)Cv{Ut|&I-@jjk z7Es(*R4W>=D61}+?CdZO2GTE{qi0}aW>$V;wY;;zkTx*O8X*;VAAf!Hb9;b1J<8wz z-GMROEzonjRdeZyZ)&=$wW}sY^1dXf3}MU`=5sk0&wXvF$Go150b1F(sk~4dq6k>% zl-O7@hdrJgls(VyV7(FFNb-v<_TCdmcvDa+WNJDa$llkGMIBurOlL9amNDdg6@@eJ zrc@GI4TNg2m=kpiE4U4+MV3UxG_tp^8ct#V>i0;iTEW*P?fG!PFFB94!~1$ET*C#B z^y0myJ~n?J;^C>$RL-McM&0n2RrTK6nRwF{{!>Bt7nrn~A6wk6Cu=i@g+npLMMQoK zZPL)vPC6jgvrtf|#!;&c{!sv$7WnEy`o)9!%4~;nGd{e^dFbCk0dY_bYkgUSvW|z_ za+1iim^{3X$6wezAF4w}3*@tv`ol7g=g-T~)daX4O`8c=1A@`W_w!L!rl*-c|9;2C z!Xgq`srs4SHT!gKHx*c|PPp6C)0HM>WGg?Ah_G;zzsFYZ;?ejhKy(|Ce;^ZvJ){gb#+#-|r$6Qpp+OPCdq0l0l-HXI~@QUr{!Ac zAXPQB!b{lAl>xHV`Sn)e5@AgZfIZ1y*^WdJ3m(F962E<(#+{~E!u4M_;w)jgLZX%B ziN7{F&X{~4b}Ob1J*w#>S@5bA(x#*SH60?MW(-G$x%N&KH|zDR?%icF42Q?hw6wA; zNL}eh1gwjy-@j8tr6F%k-_2Ww!=b-Fny7aeF@^wjNy8t_rkWr1wXe3g8%^ejYf1^h z5|@f4!x}gUP5jXo4-PxVqY#9IbUGi&8vYWapZon#GTuXE8@d$DhI|9!hGTHE53hxE{Hc2yk1e ziF~sFZ3Z>;GVt?X1R1TgB*c^VFf)%XKg{o8SAzexg-#DdCY(^p>rZC3h$~UM3>jh$ zcI6#rO~ObwMw*-|Qq=ryHuWiy%eAVQw&c^b;YowvOjE`C5hr6|szHzQo_I$Bl!Jo0 z&Z>fwc9H;}O@d^~LEu(DoU(5B`F`CT6;zjCqh`EnO3{|v9{!-KTJYrVQlIme@ODAA zhRT~`Fltvu7GyweaUQRxb|~^zdDTOU6|fQI@i_;xm9-K?yM2>G<7yHdGDT`rXyU?! zdpO9&{}RbmYUtkzbbRvA4c=RE-bDVJ4E-10!~b7+(!cvtbufJdE~`u&^?ktA!Bk(O zy^}ANU%A#JJn2`}47xv6j;%o{Ap4f7Pv!oC^M!B9S7jwP&4$|2HZ1Np5S`uCzI<7I zFf6sldVAkB#QR4GzZXDGih8@p8L|#Y&7l%_U*#97j6y|5O{IxyVPOJ%I*I7$bd<|9 z2gCsCV;?*933IC_6pQ(ng7L0dIN`kJ5K3loo>U@rM@L5t(2nBXME_{+1w(+{NHX8T zoP`D#aVa?8I7-^m%qY#P4BY#}fiEFm~Q#M~U!AU^k+Bs`;PZpDM4`S}kd!+^LkP zo}%8#@^Tkc;+^c_QZWHl!gw@Ajl3+I^AwZ%p-1RAu;52v-~rE}V2&58B|zD!)vLvz zW~Odk@Uk}EP2}+0C=M^cx=o0S z`vkQ1GfY$Dz^8ateXxnE8Ud^PcU?9N7Ag6XuLl7MNd|ORGHG0Dz zA#d9IK;2hYjw6Dg&8D+f(Wx(tj3pOaj)WOyB_)WfqjTII8Y~xrD7t_mOH<4-sKit9#Q-BOKR>u@ zLY$^^(K9eD5OI!Z_L8JDjRUrCj^sajlIqF)?v8<;Q0_y7WLs4Kp;S< zPNZKHMme3%E0gM3CiW*{V-qtTPN1z@<&a`MCPj2@-Ngbg;ZfB9#r*K0(2qbwa9AhM ztlWgE@qKTbPJ#~+j8O-6s!%UyM@u08oeBk0)6}$JDEt7(vaOwjyu|?T_^goWmf*U07Jy6@_gpeOy|TMoq32Ak}#1 zN&BBiaGs9G7SNywL#C-Lw|WLPpZ6v=>a~3yqwk#Tq8tVq7$Vga@mLDX@{zR}F0g6S&8GU8fjSEE-l{5jKcd-__;k z0g{xNC(ss~{ogt(X3}L6dMV2_#cx=_<@Heg5d&SKm8Mxy*{SAxfL_LW2Y(fyC zw2FsIg>sKh`w6yg&0@#+0yGFrWuL$n|(0*B8KV(5yh1C*E_+HR_)Tb5gmk^90W>x#Kv`QRNd%$IRPbmdU zJd!a(EdsmJ=+rj1e-X55Q->E$A;6{M$vluv;X69(iX@3BtD-Y*KygDD5@J|R=6hs) z{JAY6BF5<+idOzzA64uD<}Z&^a$zQX9fm&KX_aVR6dXQpel#GQ8NOP zCDL^U`aW1UOU+a^)>+hV+cbdoO!}N$HI8XeWBpcf!33qkDM-c{E|g1L_(MWN6`6>C zxVqk}V+x_n(mi1Hv0=?G!OIKzYyh{u96B6vPGWTt6$a+}65%E`{Vp#&~r8f3L z-a%WUxkkQf-JJ+nhOhUF1rOo5B_-3vVt^3C&&z7PT#&8{CKYwma z2b5~yjxTjMwso8|eF5A1L_QZ6z`^@+=W_!!=v$Ep`wBV)O6^KyB%!`qno=1HksHs$ z0RArqz}TRfT9$QlYxEDCn?)r0^Qnjz*etsfMV+u=Y^yF?;TM0pkdH9&@oC=hI)g&L zwPH}AGc69vwEb!GGET1ae>WdyX5322b!$FEU-~!M8XKoY16l)Zr3D<6ax$?Sm{q#~ z4)u!~P|t;ggiPIBTFu`&5UzT*3~ts|r6|ORtr|v^pl*e$s6FrCuUbPLc0v~}vt@>AMH zbk+;5IrK}wPZ)+%3+8Hp z03nrZCX2`~EHt!&^8Ov&~uhqJZGf+i*xN0+#zCs2BZo#48)r71Q-PR~OmJ_tf4FelZx% zI{0BIB<1>=w;V?v04DnrTP65x;>S)XwU|_zpAqj$1h*L z+S}Ww6t4LcJR9_eMSDKpu>a-9V|uan!Pc3biQ8TD^m@jmpK>!z&z(IB%A88Y^RE%+ z(yG{$s8(tQY6nXvS1DG}65h_{DSXM^w6p zFcno;H0s-jk5^Z89~l`Zx19CPu-rAGwUv~tVlj#8YJSug3c-=S&xg}SI}54XHuz162gDLX z#AD<`SI~P{M1LZ7cC{(HmpB>?L%=ei#KyjpPpj*2-5+;ZXmH5aZqn<5MwEJg4%X`# z#AR7oS&1`4SU=Zf(D&^FlWEkhYgbTKOY6mjy>vQ2gQ2ZOI8IS+&7C1dfzpZJ4>`tl z>6p5;@Z02w+-ES%6k#`tO^NBn2C7iq|c4VGcSx%ET0SseIlPNG6b+TTg2*vg9B6of=>Yf z0p^+hfq@ooU%%HFQs=#?Gqkj`oyj2Eh=gC&+P5e5x1oi+l$7d`9UEPuS8hNDC;cRWT`V!U zvl)PMFiWL*8)?N^;1JQZ=RH?U0qqYhxzk`@NNx5+xls5uUmfxV@WTn?Kt38MambU& zjq|OR$JMO0m8LBxM}!n8xHP9Ahbe^2H$Pi0 zJNc;9e5V!<6-J1uq@+aYhzQPVDInl`)a2v7L^}+qV3>EY^qcZw4vE63UmDwZhFo^r z?e4kE^8Ig)^n&#pJ?{5pQ%q(zVUeQ#bW|$XZii_$s@hP_kYdkoxcNVhTJ|cz9KEyV zsekPa$KA6JVDYyj?95`c?r3i2GAWyp5hZ^!?FoWUNk`Ucp~F#NZ1$$saSx5$g!dTY&n>ixF#}+ z&vJtjAD|ZmkjKfwR0juUjr2W1^gImZdkC6-4q-~67J#Yscs*$sX^Y>bwTuTePq)-Q z0k9T0MjxVCmFYE`diDJ^FdmJ|Q0K@#jZS%T?#EYEDM-F=OPu=XA2sxSjeA`uNr_s^ z?Z<%h*B|1-@`-bSx6x~7r+W5xk={RjyApxu5nsh=5s3K95^gNqRZ6WO9lim0ewIrpIWGOeb84nG*D0Q0#rEzP;!?(V^XpX5MP zs`m);h2nQsmcVJtt@aTQL6nAdgD?-EaHd}V&fntscCvVoEIj3z_FGqhkvk z^5F;~`S1_{XsljootGJ8gpoacA7GGM7iI!8GxcU&XWV!}TvwOXW?yUQ>{d3q0UQ*25i0s3C}S}Q6ok6!aKn_fkuc$V@lh*6AJ zI(+;ZX~|(LtW9F=eL4zuluxtU`-jcXb#-$6NNsS|AE00j%I6k(A61Y5Qnsxg@DEP* zX9?yB=nSH2ef~#|_UEXN**8mIdl#maY-P(WgmfQTx|m1jGgRR3k0;?PD%;@RZH^k>nZyMVHsy$_00;1Af^KsUX5+Kf@78;A z9MRdnix|tT%2`hQikoyetrICBy#w`u{+Xqa%YF4Iw3m+D#d8VJ(=_R}KLIP@wCU^+ zW}AGIhEA?yxlDe4u(}5GaP9sY^^oMd3OZ_Y0T(KQ~&B7E9cl9 zv?>e=%8Cfjq%u2Ld)M=J{eFauvYQVkF$N&MQ{I}>e{oGxrS7cCZLP|=>InK%u6vfiJFCxq0 zf`ZrcM-{i>Ev2Wo5&f_EGO01df4PlW#lQRPn&MbKqE67sTOCC1Pt`07Sc zL8v)>-+(+&MrR z9PV~odlnEjmBQ9L1*_fhNIkoLAld_Vr<3gu`b@3fk-$sP#_Ib?u(^yCwy}5%ELRHW zo)D}noNCAu{w5FbOupn%zz3OQ`51EJ^>i_mU3LXvsY>JXD)>7JHa6AE@qAD>;0Mex z0DSk`=JIz)n1L~`o0Q#bUIKT^*4>SiH=f9G2myGjv$C>QkL9lqbTou(WRu}{ZjWhv zLPGx62Kr4$Hh~~}tM$4u27!-#T+ZdR-QeHkyUnL}eZXy|3nnhk`(QL7VEBD=Ra8|e zl$2&>z3TJo{DD#2&3$=uNHrry>H{P?MMk7Qot|wLbE|Jj>#*Qy5!eiRz43k9@s4j zHD*qoVo>W|I zQ3Qrtz=7_Y8MJxtDap{F0uw;KR>;SURc~|R^zq|UvU*S`CXGTUhMDaF zko#aSFR0Ph``p`u7!Am@@v&c(zpd_IGAAbvUA^<~&yKz8AOY>U4Z+G@)g z`7YiVLoz;1&wlEco`SsM?)8S}_HC1U0^ZBkTF$y{-j^C4P*&4v5F{<}f}4hUip1yq1O#-V zAJ1O%BBDt}`!#G@as^B$iN!E5VrTDl4w%{5*%xa8cfl&ZaBqJio6->uj(741@C)>` zR5IsW^w(NF0fNCd$BhIeXZIF}aBd#?2};<99Xjm;Y_oTYuA{F2XWb#uqP;r4hd4%<-sXGx{AhdeAEm;Ajl z>j9R{$=B~fZu35-9~{?CCQD`g$wFYZyB}*1KK0i?m1x0|C__)xh`%+#4`iDO8qG#G zb!6QS$LsqP@w8eohgA}u-iOiBFlGIc-6vzCACAWav}uX(ZuW=~cki2L*X^e_-r@r} zrGEfGpQ8gjC^3z{+40>8o?j96JzdWV%@?XAP zYB4vxr&Jd{fTRR^?X}T?oAX|@J#Zc?eTc&a+>kRVL|hP=-x(ndG3)g>NU^VO4w@Rk zWC0w%wH6^fA~uFidauS^I(v`4Jq0y|)8RryX{W<-(cHWX-}eb#$%<**XV=18@o4`1 zouPH-lV(B|e3rcaViz}AHb4uZ0^OEu;J;LTo5Oamzr`~kTTbY-^5@?}yCw<*0xOv< zgSOs}`UsEMg=@qY<3Dv)+eNOpJWuUsCEf#+_z~DmRmxPJ-vQkhS{{2Uk3%9 zHX{F>!GqJ|Cz&Oz5bgCraD0PyBCLNx`wE5@2_4-UhQ>HPnmmC~W(p8g7))X=H~r{( zoS&O(M$P9_a9*rZKI^F)Ij=YNyKbuG8P+%qtQ|0CwGK|}Mf>QDvU%R!GheSa1JwA9 zO4~Qrz#PD?FaI_v`4yWw*XnXyd0*zR^4O<6`eq=nxAT{A9RD-YmyXF&QU)0R4Xv`b z-xA%9pAB0BQ&KdGPr=LFHaCBmwk;kp)6w+ZGMP_PJmXfZMb@=4&9_l$#wRCR%s7M+ zK%zx=FRf`Cw0#9!`E#n!i3w)8MdxE8lP~6b?>Se;#d-hxR=^J#nxUSBVZ6O1!1N3O zi?&MW4?j1uA1XA@`z&mV=!i>{1wDXRbBych3c98VRT9%D^!E0eHvmbk0yJg`zggGMJnkH>`ZNwxv94oTK^ zG3sdVLn?&cS19QAB;2%*7(+`|cdU-)`F3ER8fvzg&q&mWf%!vO`Cz;&?%S}f7aCM? z=K>Gy^C|=9Wn_GtRyl->r&u&x#6&~ex)z0-EG{ZK3`VpqhuK?Sp1u3-XD`q5XEq+B zRL|?VQj9O_?M{&lvcLaBrPrN}y~h~((b&=3Gwyu&`__f)q)hm>_hy&l{l!>1+gM2t z3LephpF!?=|Az+0%2mZ8QC;bF3}}_Og|JX`UvO4TO((#{b!Zq|L{*h97+WP6=Hy4$ z)5EUAUAC77_oG>ktdO(i9s~fzVRu%Yssf#1EiN|p1Qp-rRgSPh|1+- zhd}6hx@Z$<=^)jwK4gC2OR(=WLy3vkxMm@oDZ8Pq`RdUvrkd;e&_Ke}o8bGh!|C9^ zzA7qyeCrwC=i&`hfEw(d1OU(DVjWKe_E zMqNK%YoW)^4m^^$TMBjCSAUyC4$*}4&=^M`W1~1?p}ej3E@CAt)ECX-@n`UXG%uKr<`ep$#-+%3=gFt$&v+T{MIY5XU{{8JK$iI{Ob;x&siArn*{*TB1 zheXT9T%^T2;eh@TWDm*4zH4;Ral|BL&sGI^BopyP+@|vWzq*Yl+%``#%~!Lo+%@jH zLk(-tFYI!ENIl3b!#~g2WZQWJIKGE>H;X=Rp*=B!zsKCpAj**i!BlLjnc8xJmO&fz za*)*4e~zCnX(Zn8C7*?9=6@5`%Rl|WsV0;3BF5*ajt8uDAbRM>TK4z!TkoDLaCmi? ze`y|WU#bqu*B%2?mJ@0HT6rb3%2wNp2SBV94SegM`wjCT-|6&k_o_4!tdF1NN#6?ftk#jNvr zXNHYsf>Fv4u3J*b5nlFovrtN>{X*DMe7#h5nN&Rk>D|%E!WZSG03JbVktrE>!cEO? zrsD}xiKQd$xE2EU=k^vWAmPdAc4f*9eZ%8$M*1uA47n23!QLL|AHeOIg|PuwhWjqI zXMZ1u;)L^C)b9^94nOd1?mj(%=(AilyO=P9<=pb*AR3+Ge4>cBYk%u@K;Tz1WQ-=q z7eLZO>j)+3)Z0dr`2t!xb_v70zFZGcsACFprFxZNAv_7R+OIun^p&B~l9DQa;?6`3 z4H!Ox(T;}GQ?+_4G&D3Lzyo(9SoCG#*X3T3G#IS*YZm|cNp17mSNpg3tCtFgFIckU z*YXulIC=k|4LqT>K>CHqlWQa#rhe3-6Ln=M`JOt{KsWhK5-aD@8AW z-2drHtpbF5cK4f-JVEr9>u58o?hC;9P7(!tE0<22bOV2}2-j0>tsuzbte}~K5pX|B zl4eqpHk5iG+XIJ;{Ij!Dpe6(uBx7#*>_Ul!atBNfIuiPQ`=^>7<^W2jf0MZp(a2@S zWzp{OKB9;!#xR9yV%x^?24J4Z=5FmzTN;ld657Mt_11Tozj zxVHo%NH}vMy{?|fItrq z-&`IM;LZXJCn?b_2ZIcq*S$0WgE!4@EFl5ATz_Sr5xje8!`8(zJHHkS9sL1)p%(&$ znu^MaU`G8%UQ4glVX;-3sRSX4KoBP&E4ZLF{ZKGYVyb~7Dhh-S`n?z z%RSm&Vhx)wm&72Ck^*sM2>*abo4wD&G4k!ER z4=Ecj@8KVMTM&u|>fuIqKikqR=iWF}xH`bG)Jple<07U_5?B01(02{6xB+VWbxR_^ zeG%Zc|8Y|P*H{Sx=JzZm|C~;MFW~~Bd6s591H*o@J&>U;xzqyl1x{f&-xZQp?Dn3+ zh6#9gAS$K*g)lyWO%`gI8=n_eKv1xTdj$M4Hm?Nmb&B^pcHj`utwc1ymh;;+mWEqX z^R7^cfjxFc@P}@MGLDwJYdi0AS58*J@KM)e|C#eGJ%AH!;vE}OCgz@a5i);GX;zy} zO;5h*<6(r}fn0yupBXJ?JsY8*p;b+4f9?#$7miD%J!!GqZwI}N83A)H({8>?Hb64J zQ32KL^mwVRT&5u)0LuPglI;HBsQkL)Ia@UHusuEM59vrcph+t#D((wJFWIzdT>@TE zG3H6j?W#rWrB9kk`P8#EecOJalnos`A1(|M-oCj3Kuw)yjse9&gWR*-2mAsUs43Y* zkTY?0ZO~pDXSfLs38_Jzh$kjq*^X7wjxKL~7$oz`YXRm{+@4_wJ=SZifU%rk052Z| zRNNe!wbqAc*Q0C#ZW5zE>m3)jZQdjRs6K6Cj!jCNo@S9I9}6UG(>s*Tqw$dUr$1ZQpd|`>T|to zVSQ!~ahvVhJ2txGmKXUZjW51Q<1a7(9@^LZ9pwN25t9aR54ZvUtEH@so7Rt}({_u@ z-ta{W+SB`2qh`x@m0#SK!(T?T^Ze))=vuB6l7Fz9o8gu#(~7R42#9?>=hXAhRduA+ zV7&F-kt#&qWL^bLs;0PJJ#=PlUd2hr1y0rCBqpFf%L_iPrbgn__)kspT; z*p4fj@U1-+RWbEOubEI2S_XnWcnjK)K?F;*GeYAH-coi62e^zJ(Vw3gUDXcDOwhz|+MNE)lUpO1Q*fHm~-0ItPz$=YOY5hJlGGp`=t( zLs)ho-SM@d0{Sm}>zruw<_)h$wnfTX5ET|avNOQuVA2MR11-!&q#=0E#}nSSPWmV( z+p?MzK)6VLW8=Iun?omF7G)A2v85xv1t2neg)RRa4UL2^5nENY2&D>WrZR%ha#~s^ zgQS4})LHIdS(Kwmt6dOn-O}mwI$!I@vB&1|;@lFV&u3bpm_`*p7}HxG1MHO|qxVUh~9v8U#Y_X20@x=UChTbm zkQPhWrH5E_l!N{(JKmL|^}~UiKx~?y@5s z&UrTkgRZ~cZcCH4yVr-EtEndVz9%EYX1S@NOs=eDHBaRE_7fl@n~Ewa#jbvpUOsYS+B?JUB*fL$23?p!uK~Mt zezxs$u)a_9p(&E>s|RG}KPI3F(q|RFt`zl>!8#Xx#-f$^{pX#phO)AGa67Amt27xm zx3s>2fmR%~&|~Xks}9{tz0(GXH4??p6A@oxbaeC~OE_R6R1Xh-d4Rhuti*1q8A+Yp zhXK6$&tA7a-e$^`jt3t;u}XjVgduFCO}HxR)z%a^u)+Hx4X^=@uSbAL9`jF)_|hbq z)g}&zlt;l3)aw|JG|JYsw}TxGf2;DiXT+~2mwhS2W49NYEz=?=BO^0M?AYPZ>wG6J zEj`2J41Aq{O8#G6C$XMa4;!0vWc&}fGCX6pjk#Hf%E_z?#i0~wi!-_thj zu57QX?q|RN5}$R$4(9#w?=eT7v9yTVv&EVz<~3I1VeF$v_<#7g2FGHVw4{nq^14`%JSv2A>uJ{672~a^aI{PCsluJcK9{h!W_AZen8x ziUR^J6b*CUFw8klpdtz39g_Z}+`4`cV%8MtYfLjlcS9^R{AMX5iI9e+ddJ6Nw0H(* zmB8+h2SoMXrnC)#G?iN^HxBIPCji+BI^h zJERw$dU~15Z)EQV7;HYeOg6m(*}i)cCJ=(Xg|ZDpl)sUl_Ow~Yaewu=-;q~wlas5U z!p}miWN|12Wvs93jtJ`THv>|tW*@AK#2M+nafVOJ?M~1b03yU0_(s zT_`;ZZz>-cK#WJ`T`oL50!-t^|5e(12T~pP@BjA5R`w>5O;+|Qm61xwmPCYuY>v&b zQz0`E5n0)LWbeIq_TKaGeZAE^Klgoq?$7VnfBv9zIPcfEp4aubP9~A%Z>%k#L2-hm zBT@x?ih0j$w?pav-Va>8zgk^tXh{3l%{KMxSBrF31R+<+))ruq;HJ^zH@}?rb0UAn zOcZeol5-#@(~MU0IVJd=lz&d`Wiw zo<@-#6mZF)FHn3c##$A^#M}|FP&1&r%B}Yrgx85;j0p-#it=h=YgZn<9u8o|n11ei z$9vD8d`u8SA*IIrM*{FPM3XIrQB|BCtf$BkE~b% zkElEGbS2;^y`-5uwezYe3C6-8iu%l^CZZbzj0sOPXALgO6#H!OQ30~%$s=D@^YNIM z^j$QBieJT^d%Ax)u0vA$xVk*?Yo%bPn8M3)Kro-QxQW1gXe9PCL|_Rw(RJkNs(T(> z_hkn%u6xcGg}V-fA3l3|y>p!L=E~0syv9kOZcug|P^3)$33XC*ThTz~EI&V=yS7{r z+W{IGo9lWWI13@T#{C)v_As~}aOv|8Ue!$%y(`x0TZp^;Q_9n#hlW_YI?@MId4TIN2wO?iEUZ8l#B!w?AO@Cwe5}Q zN${+Av!mdgK z?*MuZT)zMOf5(o!x&pr$^1j?*;P8vo?`X`5>tljv0|PsmYqZbaMRGrM*yO24Lyy0$ zNqd~1Ze7S$OiQ`uxY{Rv}Z&a}`Y0`uuU2pu-Zrt>> zaVfUqa0zJT6ja+@8Kw+PH}dp50$efhWx(PCA=?Xxxv~+NnnAJ)9BjJqvlYDzEwvwF z!W!?c?99L4ofjKA^66Gfu-1)_Itv=Aa|! z!-u^NT!?-6zyiHL`&uiXxHnPKI2E`dccWuAHICK9s|W* zl}{x<6M!@92QZ4I5`}qkKK7TSZU)oa?-jDzVZ|kHH{3mUNQ@Xvz=*g7T=@1StdUsC z6QQr(FLN4h&>%z0pd@xE_A;j}ZmZnuCVI)_z4D^N9~o0RZ8;TMcM7aLm&wQjpyyrk<(CK zCH=!G-0UX2DR6Q~Mg6gUD1(rm({w5~ z%(*FeYC+pSBPFb&^d3J>jy_UH(i+&i-fYQ8;*;i8D$76*4=jJjmb)#aMUa&1ClfMebm;* zeMTW6l{WrGyNT1GzvCup>@ zk{?4}`5ocx*6y3%EjNx%R8hEKr&GjCLuf!ShQetk!< zn+k9Od9?uRpXp%NI70a$fl0*QZzTt^@hoo(P6V zLn0HyUd~!l+!2;jvEaE$I+ejT-nWgsO-qRU-1m6R#C6rxz|yX1$WgAj5MhAJ!o2hK zse1DVpo+ptl8ZTn@9J}My<0a``eomieE)m;2a_l@!)+DMLZ-3{xDwy0$`=<@Z_Hv+ zUHO-{cv!V?VGMPl!eDC?<0($_K4x)j!}B5Fl7E@ zgeVz5n-j0vtlAt>YMP>kcbAk`(crZTNUN#cSdi*5s5+U{85`f;PXI3IS;xAg|A=%O zg;+rZ3Z8Uf9@4z}eU9v4G<7ZV+xVlGD^f4o#e#^Rh9;`(^)LX*6X zw`)@!Mc>OT!iM-XSFsaP4|#(9RFR#mYMZl<^3T|>B7M(y1`dIMw!Aqq27MZX3?#FS zJZbi8bJg5WRa#5!@_Bd-rr8L5s!6WW&{U3X5?UI) z{$>BaImT2(f9CHh*Hftx@bxoEG`(fmnO~}*qJy^DaGpia*Tq>UIX+nD#9D0!WQXtJ zVC>99APXX0z3P8RH`N|BpCFbEBvt_V_z(H!Uqycjs>vH6vML)1Nq*&&%1%p?S6Ss) z&L;^vstnms$f%w+8@=MCSwHiGHd#(!3a_LijG-atXtT5Z_2OwH@Iaon zw*DHZlXJf0+)R3=%M`@oB@;1T@xq;w*A9q+^8z767f2Xi9aqyXv8d=|Iboqa2x=^? z%gir2nTJ!hI?G`=gtLJXZdKcvj;YUOJN{1`xNi(p48KXIidotPY%H+zPo<|xqv@UcT1UC3!EUVYFpx-kI9Bh}w)4#-j z!;$(EV^N9cepsWHt%qo1LB2;pIXi%n#r@DqUXxhmn^5>fsc3FCj42X1tt%ctY1of3^ylAsO(m|A{Wcs3! zOyhJ|EyL%Kw_&@*uSb}bqPU($cshNqXvA-3$MXt&^7wH97q|n5iWeDF!?Rt~vIF;s zNWA#g$4j5=GEx>#IK=7C5wJ+$aD-U=N&D)WFlh<&=An*fGBGpDIjjw5um4VG87tI_ z$m}t8vtu9QytZhSV>IhqAfOR8%QZw~d@W*HvVBrT0nBE|p5aYTV!4W$=wRzL8Cq!$ z?yK$0%(fTswmLH&%5;)>T9WpTY2fY6j?xDrg*Gb)7RFAJ5(`NJs;CKOGEYAh6sf8{ zooJ&rqGNne9ask4%8=uQt1L)gLm8MG34^r4x6DkzGU*y2FgDgRiFuO?7?g#iJwzzr zzb7mpnw^J-!1-(Pl~l92+s27}@l9r=Ix5rB(w^1nW}Sj%0@%MBP`sMvNL}$8 zXqntJdf0|0Xb9hSw%{{(qQ>a)1LK9gasi`R9xA|)f+Vn|aSTw}g_^5@(r$!ADIqng zWU5RJwj^fhR`nWy2h@LNMp=a+Pp*GA-JUi5eo~D~sjH!}LK@JB!UQM|J5Hgl&j6OR z-=3QMAaq)gVw^~UXgYDas1XT)c(@-;gURvMUL_?78yL6%@t=g`xQ~P5#e?WzUFbTz^07wWbc3&g`)*6^ZJ71EHDb6^mMbuL`TS9<&rfJUE-nD#N9AX znl_G4-? zjr$O@p{x=Z!aKD>V42USN@R_mJ+JDdSfIYefUqFFx;UVO4f(82t;2>5!$^3Zdx{x8 zn}aj%5h}73GNO)uWny9~gN{&w2d-cl98B^ND&e)?DCgs(uk7hPi&E2ik`fG~4@Ri1 zZiA$;sVX<@quezJUwV~fVqT=Q)V7zD-`cRdYBgKjWVGN_*|hQQLPwO$I1`Zsthznc z{*#s0$ve+WeZK8+j3sW3<9u@jOTNRp*v&izszC-_*Q+;_-}d*btJs->2&hIiMk^TV zG6HVf`zx92hm)>3PrJ~yC$sg$?db`fAvOJ*w4h*ne$==5=~^H2ygFNfAtS#V8mS;# z^|WnY0Z8|gLr_*~9m-M>GDvZG9Jig?_dXiho--)d2p%09>)!T6McUV@+3#aIAI*3S zi7~n?G4%h=e4Q@$eGRwYj2v`_vjRlp{;;s_*zhLi;%o&-U5*r6e<}(EW+3nXv@!ac z|8-;hV#yzF0nv(rL_m1<7Pt}nr-#waKg#53hft~wO3~LBvGp-s zF(e!DYM5KST|ZcJ9`dPn$S(3bE;K*po*AmUmrV!>^A@68oDTev10yE$9C58&hVPHas$cMJYdjzGY*=EKyga`LA;bgP{vvH1JjJ zXRH4oc@~%a@9JI_=CW0(Sw(2jY@RT}@^qNRxMn5pmTB0VeBZxCuI3)1S@&=m;xg)| z%=A4Z0>+7;yDQRw){0Lh@H6t@=T$u2c_xBw5+jq@ac-TW<}kX=Z;?9M+8mND3a?Jr zEf4*_y2?>MzmIm61XAC5+WP5l{)AhX@Q8`Eu3!gXQQw`YDWKbCdH?=>;rE(JYj!p^ zHXRPmOx-+&^%@tKl?=9dxMEtWh3YB94P;OkSWyY8>BLWXUuUF>}c{5OQbB=;1+EHx3~SR@kbi|9*xc>4JSEI!BB@DKrOY6CCD)UcN*TwVLd$2(jVFxdm0>h3r^EFqkFC{>M1izTgvO?hVY z=<+UZfuaNfJvvjgQH3-V|B~xsBQBlP5;rpjGlrt6tC*OW)FccV2=OB$!xiO+5A71L zXF#ykFD->v0Eq2VZvolvn+M>CpZvx$6-7UjR%JAYVC2lJO~JP*KADyC0(P8wvyC+I zJ*|(qYBu;@KanoblpV7H#}u%2ksDi;qkZ5UfG1}m>UM-!F!o6*WuURaI)KYRYgW|h z(S&9OyNAXydZNI-J!XAz|8V_i<>B-HcFs5h26X_BN{&kxp9%AI047gnww(OXK-%mbJ&X5N_ z%ij#=8m)99R}Bu$#KM@~<|b8mIX`@eHhYt9-P3-Q8pPv+a#ce)$u=H|6HJuXYI=~$ zjgnW}rB-P^W@Zt`Ztbp-X(@-vl0VZ>4T{A2#;XS?H^=#c;)<=@BniZ{T&U}#E%amY zDZGsEk5gr*8?J6w6m?3T&Q$r*^l1tGg%YcC^}}YzcXMp?l02(!3Yet}I(gzLZ9MA6 zvtKa`Bu62kp#$y)Nmg7u{N!A^??msG**q!pc~jzMG(eLn%5Rf6nF)LnB-GR^`KobO zHfqPOZqOVe@(s!4`c-IVMun1iVF}A@x*k*RQ$z0JRVVL>dk_BiaX?A6*)zT0;{Xx5 z1$am>NiY>(E|mzmFuocfBsCWLd68F7^P|+Y`n2I&saboQFBD97`U))&W?e542N{e4 zLaw5kmrBV1D~}7eNWuH$YQjzye*=54Ho*I2ImV~j(*tVfg#LW}A1>D5oBth53rh$JZN&x| ze8bj9`lloR`!WBw&HL_Wd#~;ZwvdqRuk9;!qGL;|#2~gK zn?qP>-g|j?#8c@o1_+F@Q5Ig}#9MXrPTMq>QKcW-^(WRDha$e@Pt2cxv290#46{X5 zaWir54HAnkPqQPCrV;pKEF-9~=uLEb?v*L=g(-|N3ne~SGzhVb%~@PfpzgAV=sg%4 z+aP#&=qq^j(WJsRnV-(aF{3P6!2E)4 z0%Wm!Rwy8%$^zs9$LDW0g2- zhC{=xK`T!Sl`)cOO4)Z_x9@ry8OVC{7$6S9H?It>O;t}thHd`*GPZdYL|o1!Wh#hk zr)X1@?RauM0*)T}PPe3WRqR)OEpk7a>2&chD3P|F!p%J-xf20CoAa|Y;>1ni585z( zAQ0o#Qzw`cx_loEJ?{wGv%o0bj1mmDz%{w(0658oYI2Ui6kj~t9J3!Ez?WY=)&=+W z#2S=vg9{W(Uziik8s3#FKWCP%gJKemD7rH~K6_^Es@p`;q+kjByu1_)WcvvbDB!&U z#eU%%qx%u?_xVSi5Ct0c~K4|@Adf+zra zbDCFN#V6Cw$li$d@PT&Dl=Qjq+HwiSI1RiT<@7MnRiX00_*cq9{DVzmcTEik{WhRL zCXT*5SO=26XJt4Jll?ZxRQ>Ka;l}IsJZ@OzRjwn->zou=NpI}<6^l%(r{R@Cf|va$iI z!r_*7iIiDuPrsKZwUSA>;xO#wjj(X`_cisRh{;qG%CLn{-Rg>OT`1SDbdGh;sM}GnP7Q+X={u|+e$m(Xv zN658)WI{N^H2xIq8oMhCuL&vl$hoz@M3Joc;&JoxcEya@8tQ%4)uMsrQMI+vP<}MJ z87Jjg-b;}?4V@v#6Jm%2OBh73@RvXg{zS4s=%e`lb;iVA8{UzT{l%QPAiX`e{mSAZ z=Q=QSz}XH12$-q=QI5jY@Bn=vny01g%e>ux%;}MkzSL*VlImZsEN{W6O$$-`?0OZ%`AuOi+Ts!onh{-2>BO?m9!% zuq_bMpxGJ1>d~}Ha{FfltSmxSd!-mY0@M3x8yNVIQd56BoTBA3c-2}*%d9}e^YC)Y zbIBV3zg-#-_sZemG|4oflVYIb-i{JY%+Ag>CXl1$AA%_K^m$kxDY!E;FGYAhY+q-( zx#vIYFiQ$T@UGBcQT&>ror|hN@iUmB%rcy>oxdWIE+Q+m`v_(_N)bV67+NpQk*iW5=T+q)IyB6#((V;X_dq$6CrTYUo4L@aTe<%dJVKRXy;mdgAB3bHo21h** zV0STcvX)j0Slaif_eKDdh$pVlry8|Cz~#|J_viY(rnjzQm#k1sMhNfK;y{%_FBov7rW-XJ$=SjvQNeRUL2AAKmQFB9@*(eqZO*vEm}PI3yK75pE2zQJ#l^>e z%WvG+975HXsuVpc8U!|J2-g>+{ zCsmKHW-tB>e6P=aw;~pAHt5h!7+ZnU>HK(6NL|gYo=ETe^uTMt*eZDOB0eAf=9lP` z9Lww`*Bie54Vzqd+bNheX8iCde)$kdeOOz2Z5nnD)KhT<^Q##sAdeR>V5xvq&lK9Q zIT=Y!Lh^anzbV|q5@43ZpIFFZ>}|SR2VsQ^eTa%`H8|g?(vH|*l+uG*#{&FLf$?uy zDRcc1!x}LDsW2uoEePA34}RZFZzCrcFk^p*Im^s!#j>TrkTneoS3+~J4SSf`Auy&k z(WsA;`wao#N_SC;!?6^_50lZZ&kkJH_R$ZE0IjGMoM-nb8G|JBoh-v281YV39MNp7 z0hY;8$YY~T#l2^$szvG=8m0Q_>)$UXHoK7YZhEFC#QZ$lbdsQTM)TUW-Abn9fMs;U zjS-}4@em*u9BhdPllVLV>kge#kEhoO@=*G%H1gFC3)jVji+%PW!&+}lG0*2H@glsB zu*E@8H1|>ysE0Ix5@HPeGh#F_z{fb#gLd;QVzdv7O4mr7-61y13G(!?yqDy|;FlD) zexDP3%5HwjIa&o1D5v$v->5rDHcHKMC?7c31V8e$dbb_=)6iz0wsv$BN|~+-C5|4< z1c-xJCxPw3+B1~^nWPYqeJ7Au%lJ;LRFV||WyelQEbqh-OrA^`R!bkcZrLX+%eyHe z8m&hIcTt&_QaUyc*CzGH#ZNp@J#b4MfE`7`ZXpE1m1&T;CF3bs28-H*V3)i1C`b@giBF9y64X#=MfkB8*cqao+F1P{beEPCTD#&ae; z+Xmvxowk3FlJ<7&8+!r@i;W7-T#_vMJhrrx8oe7VY;SXFDrQuIue&Y*t2hRVLPufg zro(=O0^s>#BDK<2Rl;!S<{PEz;1IQ&ems1KC>bZSwS;=+M99e^j&0hd=#Zc&?QTV} zj%^KmE%U;?=ff&vPS^6N~$M(a2I_}SU-`QryhgJQ##v$?IA z;^i@`SahtO-&w6~^H-i}WltaRjN9i5cev?3pOvt0D*Y<|I?pM+G#qYN)%(HE`4)m+ zH!)YnNRs-q=jk=@5=M%BgiE_#2}kbj33At;rPr9gex0=;Qgkr^O6y+r+7sU~E}V7S z)e2|p*57;e1daiY!f~sUk!4Wx7sf1T#JNs}&Q;>+N$JQWjDxxZBMghc=J!hVt);co zflU}iW)^!<%|B1X%NG5>|7MAgOzZFsSJbl~3uJ|=arMi6F=e9ICPp#=rxcc8UXfBp z9ud|NR$NreQFlh&Yh>~nJh3@aSvj2B8VONz9K$V~(6ajhQUzEPL)~^-zC!kS)6RWNtk+s{ z#JC=4>y6K2(#l={lr4A34c~2ZG@lA*sZK+Wv~2FuSl_%nBuXZsm97*GFsErJ@#)JS z`7|I(#ddQgU>>-rz-jgB&$c4f^!HfjQ3c9nh_*kQ1MNM3D-!!RfPCibeeYPS(+?l; z^$Ko9QC5_U>%73OSjM7(h8rm=lWu)ruW$x*3sRb~Cm<$}x!L@h2WMT-WshAG!S<6} z0yb@oe9eUj@j_fp5O*9jR){?;cC=&mZb-fpEcg!FphyJ-oE77=LMVXA1dQZaVq(4u z6&cuZH!Urh#R!<>H)<+_xBqV z`J-kQqD;hhgA48VTup?)d6(b_S42ZY1AEaO4h{}MNk&GQIaCex4{cx~lKwfZ0#(W8 z{7ezJaJcmi9ovA1ZOG(MihSLbr$bRw&_G= zW#xXx%Enj>;+yC{mmn230OtTl3$Hy-(p;jmL6RBjL!zb}{(#v=R%@dP0Li;dLBx^> z@*Uzk^h1nYdb#JvT^_CPSe2d<7h8_q0PRZM)AbH&L@T#UYUCO#V~S+)fv@a3gwGiD zkGeYxhJ6;1ZABb~AiUjK*@l-~RvwHng7nY8r2{O5JwP9`#l6DCtChtF(kg)LrL#DE zdG9fD39%!gqyN}^PFw*#+FDVgW@MbVa&S3`BW9T-a|55P)R$ zHtzvtY>$%sTZ9FoK63Gl?_3Yo^0*pI?a4TN zvomT0?v4?Zd<6jep=4G^H84kdd-7LKeLF8&J6#RSJ^mw)BUKXq`Rx%6(t6}RPGGDSdr~@$uf`|Edo*j4TsH^4LUJZV(oai2D`lOZ?%8?e(mfXopZ<4S> z2-t+Y`PBvT+Jjlncg8&L@TI{Kq8dHS6NEp>?4q80RgH|+cgaY+6No7e!^;7qq+9ef13b70=|A+N?n}h*a8koRtW*=E(glf} zLR^CCSAv7$7587UL-v$y&41O;1eloM2cE^wfWlNq59^?c=y%LdZaj>Vs&o~%Q{d4p zVe}A($xI&A>rk_l@V`MU^%!T^O<|&;M2~r+m-nebNt&X$&mKx|eHqSWI5;?kV6GN@ z8uP_ZOK+E!l1g7!t&%*^*^%}*^Vk{nKt@;WR*!uF3_roTN1voBQyoF`!pP#qTczlg z5$}D_oHLtVWHH*pP?|H22R`f_Vh(JEefV5h> zOr8Ftkka}5Bv@BlU)(lAYPWQB*d@vW93(o{*$-2t7xi)0t;Ki4qc%a)Er8l2k}c?; zJKj)vw`;PvM|f9i~^7HKY87ys3mC@IuG`Kj0{QerV~ zM2%s1r&IjhCxfI}9Hf}-UV55qii$6Z`Pp)9*Ng5XuM@nP)vIi=tYMENU9@Ih0^Tfj z%Z9;^BY_V_DV#o{cQY*xbim$w(AFAn>SEeieV%#t$(Nz!l);U?TE9qF*m{I{;OT(v zYVK#Saefl(0zG3FwfIMy$a&uU(;V09qN2m3!R0y~Wp=y_8j}SXhs`P?`;j>p&dDS5 z#TupWH_McO5@`C&dvkfanWWxixZ7ywVsi5Ot7QW-Y8m(XtUP#Q!7JO-W)j#itc7L|q2xL(<8OLb_6LE&-m@KZAj<)w#HEv6~Ha;*j zL&Ud>&i7sSPsPF8hRm1M%=kz3{K-pJD$hwB7qj(ZML^eKB-sTR(p;rs zDIRwV*7gUn?A}{NUn_oC#>I<06*=cxsntyx;afE8b?*V&4s32~k&u?JCZD#cRkJDl z8Hd42`@V|WV&`Bekw!F|LJ#b7+FeYJ8ypYBKYuG5jd}7(oXuSk*~yo(y;_$rJgnus z)}#3AXgg>AtMqA%_WP+A(Vmlee)WcIk7~H>*!N;{$>sodsZv9)lUxdk{SOX28ghPq z{;%l4S%h#6&6u5o$4Y3GWWFpo;AZu04SM_p_@I-^>@VD7!$qEqyha;&yH@04;MLyp zp-{2n;bKPyf)MHJJe>lKgkoJ=Az$IUKTTZ2fPe~ZNlMRN#Xy@v_=*60ToifcVO{Go*8tWGQg8gb9qkp%kOugvWwK`YXh3x=O(};Wv!jViJLrsul>*m zi|_+3DAZXH#9vs0|8R>rJC77P2{d+8KHf|{yEAK5n)+zI-W$38Epi9;Jh|P~EW5%m zk^r@!uirK=Y2onx>U>X=2XnRSJlU(M?1VSwcS1oeR)3_t9m6}&eSE)nHK#4+)|3{U zz^1A~w=?R)lCOCUfr#jnJBLt_O&_srtt`(UkbgpiDq%pGxnxhq6Sm`8AG;Uzt2xWU zXGJ>pUc}IGj+m5KKi74mO6LH%ZoN}IRwQm$VK;SnaR1*$ zz!#P&&m9KftWl0R(`%C%W{bP1Xo#7M5pN8Q1mVv<(#m3!nIBy%Kyv0v!Ep9Ziy9mJ zy$!Y|MnufCzoI?D+-{g*S$SOP+WI1}xc$Xzs6mF>MeCN3q>A5b#6I3nnvFUt8akGN zr%xZNtFI|%;I*GdRwKV(*Q$1FKe#Z^?xF!_yJ#}5H)qSBLoU3TKL5OMXudn-EHZ12Hhe?#12*2Sf$7AiGS~t*gY!rkavMv# zE^O{aou3Ttgv$rSmlF6S?V1L6tqfBSuspC@>C=>kcEgJ;Y@*gR4|f^BBevsa2!2V8 zKImhZ&|DF49xhQj&32Nm3sBcm907U7rO+$gc_8|+MeFCzi9bOHdBY$^_-n(65T@!R z8Tcu~MaFD^bA8id*AS-AbkaU1a#4wQSqkILXhFtF7QO;WO_|$C23soVMWLUwmv`zz zi({{)c_5v!AiDqlgZknJO@ra6YdYfIh z{9t{)#w(ol*yH@`oef2tr^?D9S-MCaZfyPPJ6#gork(K-1lH%Z&KiBCyW&$l%e+}f zA{**i_ze~FuA8aN0iB9-+E;+zMlRcKBjL8Zru7Z$Glt`nPZw{}go#R?Go0Hb?}da$ z>XkdT9nfvIN;-NxoYHj=yY*zS5|lRdFQ9lK&#T8XO=j}&CDKT!Ntw$J*CL893PzkY z+G=+frNLcSYG(#dqUdlzr^R;bM_2sKL}=R9Y>%HMBJsn0dak-naw?9QDGElW!|Y$& z8bWVhPAd+tXkhk_JgpCyxm!^nQKFU>f9McQ`Ti9>9wtKFbG)J&7f1 zby`l1u87)izty$$^1#Ll6b0^sF3EdieK+b#z@Ysm7uN?xdgm_pLdpK@ae72-0 z-48z9V1j=QM+r#t*&UyJUxIe)d_eYrI{$9@K}bpP{fOcp)n_}@K!)cA?5|kvFdQZC zBfWoU6$6oe)<1o#{h@N8jo>GR<%y^CWY+m#u2He^qBeCgeS9F4^;UfIEDQq!1G3;$ zxsfw&%>$6Y_c`0+X&<6}%k@c5lcWA84_i++t9iv8Dpr-hV_LO>XzFr*3lo4Vvx0jO z7$9&lJ$yr#qg(p;&b93Rn7?zVHdS)zpWT|kX14})24$Ox0H<#|6z&7^-XJ*ZQ;-ch z%wCOvh)>>V&;8Z*-!rvB@<%V7oIXa)XAnRE-7a)khZKn55}XI3q>}nG5X=1Z3Rfb1#9~&u^q3LTb~-A-=P8=NFjpfSb0hy9W2o= z3C3U@yS|kKx!%@Q&be1^z)@tnq4IfWXf_GytDTuE#a_watULL41!6VK%J=TUI{2w0 zP!Q9gR#L<0?%lhU##$CwT1J<{pkoFPH<+#ZnIb{g%Xg7cD`O_?K_enGIi#21qnPSr9kqKqfYAZ?coW6eHAu=lNfFg zYdnI%-a;i6R+aW2t)M@ETL*8EP;Nhngm77KeN8-iUeC_oB)x9 z=PB84RAXw;fzf%-Rj7#`Vf_!csv470_ftaWxlj?PON=`Bly(k)D>!IXFn3+`%HH${ zfcT0Aa7`xjJV3F)&sHwsP?6Mni@r5{#kcuOeZuPnO8&2iQkgy}#45;j3+3FD@e~0* ziMmkRTrslD#{?fWh4$sU^Oy(Y#=!3|+mSMSo4SEZVD<`VlYd1WZTXa=)UAdUPm@Ep z5G+iwSlo>kC?)1?yi^wy!^%+;LCY^ro-DMi6F>i#e*S;&cMJu;Q;@T50V>d2 zDfbi6C)q?>@W-xRj_PWlAUlFQm}QoJ(ynV_Cxrp21}fgZ^D`Fo zT6>g_k`1G=oKiKb=pq$*J=IR5~U{uZY8*cgi*k~IsvI5kih?4 zB@R3jHjpyh9w$=VGCTZP9y>6c0sMJQ@?T1Ag3JC&s+a#vkl!#9finavTYN1^AtwUVBBi5uLe@=T z>jL1x4|fKRcym4@@X9r%EjdCQ#D3QUmMCsD`Qx0)$`&%kGCl_a2q-cGfpF$A(-$x9 z{lSNhldVfhO6p0YCeKrVC-@rNvkOGN>=|}50jfMS5f7+#w)@>_b;FS4_A4S_0PPgH zX%GoA-=?bdAEKj`Y;5xOHYRj{VT%9QKLIF2hIZx(+$G$k$V?&BwY z;D+^&U4Eipg*@-cajnmmI&F8UcLTY8-8Pl^D2I+E5$lv+_bcpkvaUqcY?VM15bOr- z|Fa?z7#==!d2X{nLFMG+EcDHB*ytu4&uG4t-4D`gNDN@oCZ(cUCY5k=iIq>)0T07j zuyEkCG{BLRLSAX)$xtkJ(05ewE{XmA_dDmOo8=2=s0(Bu5-~7=YxL6@@WkH%k4UV6 zo*uWBD0*gpqSXfG;C0VCuf{V$SQOBIxFaKD!*}CwSn9aApY7m5MrmmO2ETDJU|fVu<1*@q-NkC$ zU#qI}$2fi0K*9h_+%}uR6-Y20W+z*c2GpR|;;MaaDnR4iTchIi^Scvz<#9+l74Z@w z-Kd2)J|a60j8{Q?H3?8ux?;53K*<{`Fp)t<^1HZz;}QwlI!t-MGhvY+oJYy4U%?Im zJ3;4jX0zasZAe&H)XgV1lwh#jE7ydf%Ek_ponI{+gCKOmk%)z}uszo##Ln-1)o0|# zYSXLBQmNjTb|H6Z0Z@qr7}qWHMIFG^{(EO{ufRyzKHc%zuE&zU`xlTd7bL@B!syWo zVsG*s4grY?dj<__(0sfB1hZ1OSzm()v7*A3Vga($j$wy#W|GFs%Zrqhlo^oTJ^-qW z={K+)031_n5J??6wp7t*B=;r$)G@@9`7uu+^Q zu(Y&TFVlW;{RMSl3`v0pb$zTj8l1MYz+=w(*D~W*lw8@h5{957iItJDCWfO7EJ{Kz zQN!qP7=637yof6S6_8U>KfHEjAk#i<+f>?Bb))K(UM~b3r+%G${ Date: Tue, 5 Nov 2024 11:45:43 +0100 Subject: [PATCH 08/63] Refactoring: - refactor generate_test into base test class and adapt parallel generate test accordingly - refactor lm head selection into own method - create utils file for utility methods and adapt imports accordingly - remove is_speech_model flags - replace redundant attributes and methods - enforce proper method naming conventions --- tests/test_impl/base.py | 71 ++++++++++--------- tests/test_impl/composition/test_parallel.py | 56 ++++----------- .../core/test_adapter_backward_compability.py | 2 +- .../core/test_adapter_fusion_common.py | 4 +- .../embeddings/test_adapter_embeddings.py | 19 ++--- tests/test_impl/peft/test_adapter_common.py | 21 +++--- tests/test_impl/peft/test_compacter.py | 47 ++++-------- tests/test_impl/peft/test_lora.py | 12 ++-- tests/test_impl/peft/test_prefix_tuning.py | 23 +----- tests/test_impl/peft/test_reft.py | 2 +- tests/test_impl/peft/test_unipelt.py | 4 +- tests/test_impl/utils.py | 29 ++++++++ tests/test_methods/base.py | 38 +++++----- tests/test_methods/test_albert.py | 8 +-- 14 files changed, 146 insertions(+), 190 deletions(-) create mode 100644 tests/test_impl/utils.py diff --git a/tests/test_impl/base.py b/tests/test_impl/base.py index 0d20f32fef..fc2338626f 100644 --- a/tests/test_impl/base.py +++ b/tests/test_impl/base.py @@ -4,7 +4,6 @@ import torch -import adapters from adapters import ADAPTER_MODEL_MAPPING, AdapterSetup, AdapterTrainer, AutoAdapterModel from adapters.heads import CausalLMHead from adapters.utils import WEIGHTS_NAME @@ -12,33 +11,26 @@ from transformers import TrainingArguments from transformers.testing_utils import require_torch, torch_device - -def create_twin_models(model_class, config_creator=None): - if config_creator and model_class.__name__.startswith("Auto"): - model_config = config_creator() - model1 = model_class.from_config(model_config) - elif config_creator: - model_config = config_creator() - model1 = model_class(model_config) - else: - model_config = model_class.config_class() - model1 = model_class(model_config) - adapters.init(model1) - model1.eval() - # create a twin initialized with the same random weights - model2 = copy.deepcopy(model1) - model2.eval() - return model1, model2 +from .utils import add_lm_head, create_twin_models @require_torch class AdapterMethodBaseTestMixin: - """Provides base test running methods for testing an adapter method implementation.""" + """Implements base test running methods for testing adapter method implementations.""" - # Model weight dtypes to test in forward pass dtypes_to_test = [torch.float32, torch.half] if torch_device == "cuda" else [torch.float32] - def filter_parameters(self, model, filter_keys): + def _assert_adapter_available(self, model, adapter_name): + """Check wether the adapter name is present in the model's adapter config and has been created.""" + self.assertTrue(adapter_name in model.adapters_config) + self.assertGreater(len(model.get_adapter(adapter_name)), 0) + + def _assert_adapter_unavailable(self, model, adapter_name): + """Check wether the adapter name is not present in the model's adapter config and has not been created.""" + self.assertFalse(adapter_name in model.adapters_config) + self.assertEqual(len(model.get_adapter(adapter_name)), 0) + + def _filter_parameters(self, model, filter_keys): return {k: v for (k, v) in model.named_parameters() if any([filter_key in k for filter_key in filter_keys])} def run_add_test(self, model, adapter_config, filter_keys): @@ -56,7 +48,7 @@ def run_add_test(self, model, adapter_config, filter_keys): # check that weights are available and active has_weights = False filter_keys = [k.format(name=name) for k in filter_keys] - for k, v in self.filter_parameters(model, filter_keys).items(): + for k, v in self._filter_parameters(model, filter_keys).items(): has_weights = True self.assertTrue(v.requires_grad, k) self.assertTrue(has_weights) @@ -70,7 +62,7 @@ def run_leave_out_test(self, model, adapter_config, leave_out): model.set_active_adapters(name) # adapter is correctly added to config - self.assert_adapter_available(model, name) + self._assert_adapter_available(model, name) adapter = model.get_adapter(name) @@ -95,7 +87,7 @@ def run_linear_average_test(self, model, adapter_config, filter_keys): averaged_weights = {} for i, w in enumerate(weights): this_filter_keys = [k.format(name=name + f"_{i}") for k in filter_keys] - for k, v in self.filter_parameters(model, this_filter_keys).items(): + for k, v in self._filter_parameters(model, this_filter_keys).items(): base_k = k.replace(name + f"_{i}", name) if base_k not in averaged_weights: averaged_weights[base_k] = w * v @@ -113,7 +105,7 @@ def run_linear_average_test(self, model, adapter_config, filter_keys): # compare averaged weights to collected weights this_filter_keys = [k.format(name=name) for k in filter_keys] - for k, v in self.filter_parameters(model, this_filter_keys).items(): + for k, v in self._filter_parameters(model, this_filter_keys).items(): self.assertTrue(torch.allclose(v, averaged_weights[k]), k) def run_delete_test(self, model, adapter_config, filter_keys): @@ -125,16 +117,16 @@ def run_delete_test(self, model, adapter_config, filter_keys): model.to(torch_device) # adapter is correctly added to config - self.assert_adapter_available(model, name) + self._assert_adapter_available(model, name) # remove the adapter again model.delete_adapter(name) - self.assert_adapter_unavailable(model, name) + self._assert_adapter_unavailable(model, name) # check that weights are available and active has_weights = False filter_keys = [k.format(name=name) for k in filter_keys] - for k, v in self.filter_parameters(model, filter_keys).items(): + for k, v in self._filter_parameters(model, filter_keys).items(): has_weights = True self.assertFalse(has_weights) @@ -146,7 +138,7 @@ def run_get_test(self, model, adapter_config, num_expected_modules): # adapter is correctly added to config name = "first" - self.assert_adapter_available(model, name) + self._assert_adapter_available(model, name) adapter = model.get_adapter("first") @@ -249,7 +241,7 @@ def run_full_model_load_test(self, adapter_config): def trainings_run(self, model, lr=1.0, steps=8): # setup dataset - train_dataset = self.dataset() + train_dataset = self.get_dataset() training_args = TrainingArguments( output_dir="./examples", @@ -281,8 +273,8 @@ def run_train_test(self, adapter_config, filter_keys): model.add_adapter("dummy", config=adapter_config) self.add_head(model, "mrpc") - self.assert_adapter_available(model, "mrpc") - self.assert_adapter_available(model, "dummy") + self._assert_adapter_available(model, "mrpc") + self._assert_adapter_available(model, "dummy") # train the mrpc adapter -> should be activated & unfreezed model.train_adapter("mrpc") @@ -291,13 +283,13 @@ def run_train_test(self, adapter_config, filter_keys): # all weights of the adapter should be activated has_weights = False filter_keys_trained = [k.format(name="mrpc") for k in filter_keys] - for k, v in self.filter_parameters(model, filter_keys_trained).items(): + for k, v in self._filter_parameters(model, filter_keys_trained).items(): has_weights = True self.assertTrue(v.requires_grad, k) self.assertTrue(has_weights) # all weights of the adapter not used for training should be frozen filter_keys_untrained = [k.format(name="dummy") for k in filter_keys] - for k, v in self.filter_parameters(model, filter_keys_untrained).items(): + for k, v in self._filter_parameters(model, filter_keys_untrained).items(): self.assertFalse(v.requires_grad, k) state_dict_pre = copy.deepcopy(model.state_dict()) @@ -370,3 +362,14 @@ def run_reset_test(self, adapter_config): # check forward pass self.assertEqual(len(output_1), len(output_2)) self.assertTrue(torch.allclose(output_1[0], output_2[0], atol=1e-3)) + + def run_generate_test(self, adapter_config, max_new_tokens=32): + + model = self.get_model() + model.add_adapter("generate", config=adapter_config) + add_lm_head(self.config_class, model, "generate") + model.set_active_adapters("generate") + model.to(torch_device) + input_ids = self.build_rand_ids_tensor(self.input_shape).to(torch_device) + generated = model.generate(input_ids, max_new_tokens=max_new_tokens) + self.assertLessEqual(generated.shape, (self.input_shape[0], self.input_shape[1] + max_new_tokens)) diff --git a/tests/test_impl/composition/test_parallel.py b/tests/test_impl/composition/test_parallel.py index 8aa2c8a6df..0e97791cd9 100644 --- a/tests/test_impl/composition/test_parallel.py +++ b/tests/test_impl/composition/test_parallel.py @@ -12,7 +12,7 @@ T5AdapterModel, ) from adapters.composition import BatchSplit, Parallel -from adapters.models.bert_generation.adapter_model import BertGenerationAdapterModel +from tests.test_impl.utils import add_lm_head from transformers import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, Trainer, TrainingArguments from transformers.testing_utils import require_torch, torch_device @@ -116,7 +116,7 @@ def test_batch_split_with_heads(self): ) ) - def test_parallel_generate(self): + def test_parallel_generate(self, max_new_tokens=32): if self.config_class not in ADAPTER_MODEL_MAPPING or ( "seq2seq_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types @@ -126,25 +126,13 @@ def test_parallel_generate(self): model1 = AutoAdapterModel.from_config(self.config()) model1.add_adapter("adapter1") model1.add_adapter("adapter2") - if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[self.config_class].head_types: - model1.add_seq2seq_lm_head("adapter1") - model1.add_seq2seq_lm_head("adapter2") - else: - model1.add_causal_lm_head("adapter1") - model1.add_causal_lm_head("adapter2") + add_lm_head(self.config_class, model1, "adapter1") + add_lm_head(self.config_class, model1, "adapter2") model1.set_active_adapters(Parallel("adapter1", "adapter2")) model1.to(torch_device) - - seq_output_length = 32 - - # Finally, also check if generation works properly - input_ids = self.extract_input_ids( - self.get_input_samples(self.generate_input_samples_shape, config=model1.config) - ) - - input_ids = input_ids.to(torch_device) - generated = model1.generate(input_ids, max_length=seq_output_length) - self.assertLessEqual(generated.shape, (2, seq_output_length)) + input_ids = self.build_rand_ids_tensor(self.input_shape).to(torch_device) + generated = model1.generate(input_ids, max_new_tokens=max_new_tokens) + self.assertLessEqual(generated.shape, (self.input_shape[0] * 2, self.input_shape[1] + max_new_tokens)) class ParallelTrainingMixin: @@ -208,7 +196,7 @@ def run_parallel_training_test(self, adapter_config, filter_key): state_dict_pre = copy.deepcopy(model.state_dict()) - train_dataset = self.dataset() + train_dataset = self.get_dataset() training_args = TrainingArguments( output_dir="./examples", do_train=True, @@ -241,22 +229,12 @@ def run_parallel_training_equivalent_to_single(self, adapter_config): a1, a2 = self.create_twin_adapters(model, "a", adapter_config) b1, b2 = self.create_twin_adapters(model, "b", adapter_config) + # TODO: refactor this dataset creation into an own method dataset = [] - if self.is_speech_model: - dataset_batched = self.dataset() - dataset = [{} for _ in range(len(dataset_batched))] - # As this test uses a non-batched training, we need to wrap the samples by an additional dimension - for i in range(len(dataset_batched)): - for key, value in dataset_batched[i].items(): - dataset[i][key] = torch.unsqueeze(value, 0) - else: - for i in range(3): - input_data = self.get_input_samples(config=model.config) - if isinstance(model, BertGenerationAdapterModel): - input_data["labels"] = torch.randint(0, 2, (3, 64)) - else: - input_data["labels"] = torch.randint(0, 2, (3, 1)) - dataset.append(input_data) + for i in range(3): + input_data = self.get_input_samples(config=model.config) + input_data["labels"] = self.build_rand_ids_tensor((3, 1), 2) + dataset.append(input_data) for adapter in [a1, b1]: model.active_head = adapter @@ -314,12 +292,8 @@ def test_parallel_training_single_forward_pass(self): input_data = self.get_input_samples( config=model.config, ) - if isinstance(model, BertGenerationAdapterModel): - input_data["labels"] = torch.randint(0, 2, (3, 64), device=torch_device) - elif self.is_speech_model: - input_data["labels"] = input_data["decoder_input_ids"] - else: - input_data["labels"] = torch.randint(0, 2, (3, 1), device=torch_device) + + input_data["labels"] = self.build_rand_ids_tensor((3, 1), 2) outputs = [] for adapter in [a1, b1]: diff --git a/tests/test_impl/core/test_adapter_backward_compability.py b/tests/test_impl/core/test_adapter_backward_compability.py index 90150267ab..722d6499bf 100644 --- a/tests/test_impl/core/test_adapter_backward_compability.py +++ b/tests/test_impl/core/test_adapter_backward_compability.py @@ -3,7 +3,7 @@ import tempfile from adapters import SeqBnConfig, __version__ -from tests.test_impl.base import create_twin_models +from tests.test_impl.utils import create_twin_models from transformers.testing_utils import require_torch diff --git a/tests/test_impl/core/test_adapter_fusion_common.py b/tests/test_impl/core/test_adapter_fusion_common.py index ccc860f667..1a0a2137db 100644 --- a/tests/test_impl/core/test_adapter_fusion_common.py +++ b/tests/test_impl/core/test_adapter_fusion_common.py @@ -206,11 +206,11 @@ def test_output_adapter_fusion_attentions(self): model.set_active_adapters(Fuse("a", "b")) output_1 = model(**input_data, output_adapter_fusion_attentions=True) - self.assertEqual(len(output_1[0]), self.default_input_samples_shape[0]) + self.assertEqual(len(output_1[0]), self.input_shape[0]) self.assertTrue(hasattr(output_1, "adapter_fusion_attentions")) attention_scores = output_1.adapter_fusion_attentions["a,b"] self.assertEqual(len(list(model.iter_layers())), len(attention_scores)) for k, per_layer_scores in attention_scores.items(): self.assertEqual(len(per_layer_scores), 1) for k, v in per_layer_scores.items(): - self.assertEqual(self.default_input_samples_shape[0], v.shape[0], k) + self.assertEqual(self.input_shape[0], v.shape[0], k) diff --git a/tests/test_impl/embeddings/test_adapter_embeddings.py b/tests/test_impl/embeddings/test_adapter_embeddings.py index 64a07d381f..b40b6bb0d9 100644 --- a/tests/test_impl/embeddings/test_adapter_embeddings.py +++ b/tests/test_impl/embeddings/test_adapter_embeddings.py @@ -47,7 +47,8 @@ def test_delete_embeddings(self): def test_save_load_embedding(self): model = self.get_model() - tokenizer, input_data = self._instantiate_tokenizer(model) + tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) + input_data = self.get_input_samples(config=self.config()) model.add_embeddings("test", tokenizer) model.eval() model.to(torch_device) @@ -70,7 +71,8 @@ def test_save_load_embedding(self): def test_back_to_default(self): model = self.get_model() model.eval() - tokenizer, input_data = self._instantiate_tokenizer(model) + tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) + input_data = self.get_input_samples(config=self.config()) output1 = model(**input_data) model.add_embeddings("test", tokenizer) self.assertEqual(model.active_embeddings, "test") @@ -99,7 +101,7 @@ def test_training_embedding(self): state_dict_pre = copy.deepcopy(model.state_dict()) initial_embedding = model.get_input_embeddings().weight.clone() - train_dataset = self.dataset() + train_dataset = self.get_dataset() training_args = TrainingArguments( output_dir="./examples", do_train=True, @@ -174,14 +176,3 @@ def test_reference_embedding(self): # activate for training model.add_adapter("test") model.train_adapter("test", train_embeddings=True) - - def _instantiate_tokenizer(self, model): - """Depending on the model type, instantiate a tokenizer and input data. - Speech models require a different tokenizer and sample size.""" - if self.is_speech_model: - tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) - input_data = self.get_input_samples(config=self.config()) - else: - tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) - input_data = self.get_input_samples((1, 128), vocab_size=tokenizer.vocab_size, config=model.config) - return tokenizer, input_data diff --git a/tests/test_impl/peft/test_adapter_common.py b/tests/test_impl/peft/test_adapter_common.py index 5d58239af6..696e8ddc09 100644 --- a/tests/test_impl/peft/test_adapter_common.py +++ b/tests/test_impl/peft/test_adapter_common.py @@ -19,7 +19,8 @@ SeqBnInvConfig, ) from adapters.heads.language_modeling import CausalLMHead -from tests.test_impl.base import AdapterMethodBaseTestMixin, create_twin_models +from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_impl.utils import create_twin_models from transformers import MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, CLIPConfig from transformers.testing_utils import require_torch, torch_device @@ -130,12 +131,12 @@ def test_delete_adapter_with_invertible(self): model.set_active_adapters(name) # check if adapter is correctly added to config - self.assert_adapter_available(model, name) + self._assert_adapter_available(model, name) # remove the adapter again model.delete_adapter(name) # check if adapter is correctly removed from the model - self.assert_adapter_unavailable(model, name) + self._assert_adapter_unavailable(model, name) # check additionally if invertible adapter is removed correctly from the model self.assertFalse(name in model.invertible_adapters) @@ -144,7 +145,7 @@ def test_delete_adapter_with_invertible(self): # check that weights are available and active has_weights = False filter_keys = [k.format(name=name) for k in filter_keys] - for k, v in self.filter_parameters(model, filter_keys).items(): + for k, v in self._filter_parameters(model, filter_keys).items(): has_weights = True self.assertFalse(has_weights) @@ -386,13 +387,13 @@ def test_train_adapter_fusion(self): self.assertEqual(adapter_setup, model.active_adapters) # all weights of the adapters should be frozen (test for one) - for k, v in self.filter_parameters(model, ["adapters.a."]).items(): + for k, v in self._filter_parameters(model, ["adapters.a."]).items(): self.assertFalse(v.requires_grad, k) # all weights of the fusion layer should be activated - for k, v in self.filter_parameters(model, ["adapter_fusion_layer"]).items(): + for k, v in self._filter_parameters(model, ["adapter_fusion_layer"]).items(): self.assertTrue(v.requires_grad, k) # weights of the model should be frozen (check on some examples) - for k, v in self.filter_parameters(model, ["encoder.layer.0.attention"]).items(): + for k, v in self._filter_parameters(model, ["encoder.layer.0.attention"]).items(): self.assertFalse(v.requires_grad, k) state_dict_pre = copy.deepcopy(model.state_dict()) @@ -452,13 +453,13 @@ def test_batch_split_training(self): model.train_adapter(adapter_setup) # all weights of the adapter should be activated - for k, v in self.filter_parameters(model, ["adapters.mrpc1."]).items(): + for k, v in self._filter_parameters(model, ["adapters.mrpc1."]).items(): self.assertTrue(v.requires_grad, k) # all weights of the adapter not used for training should be frozen - for k, v in self.filter_parameters(model, ["adapters.mrpc2."]).items(): + for k, v in self._filter_parameters(model, ["adapters.mrpc2."]).items(): self.assertTrue(v.requires_grad, k) # weights of the model should be frozen (check on some examples) - for k, v in self.filter_parameters(model, ["encoder.layer.0.attention"]).items(): + for k, v in self._filter_parameters(model, ["encoder.layer.0.attention"]).items(): self.assertFalse(v.requires_grad, k) state_dict_pre = copy.deepcopy(model.state_dict()) diff --git a/tests/test_impl/peft/test_compacter.py b/tests/test_impl/peft/test_compacter.py index d6365f8101..39b17c0815 100644 --- a/tests/test_impl/peft/test_compacter.py +++ b/tests/test_impl/peft/test_compacter.py @@ -1,37 +1,36 @@ -from adapters import ADAPTER_MODEL_MAPPING, AutoAdapterModel, CompacterPlusPlusConfig +from adapters import ADAPTER_MODEL_MAPPING, CompacterPlusPlusConfig from tests.test_impl.base import AdapterMethodBaseTestMixin -from transformers.testing_utils import require_torch, torch_device +from transformers.testing_utils import require_torch @require_torch class CompacterTestMixin(AdapterMethodBaseTestMixin): + default_config = CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8) + def test_add_compacter(self): model = self.get_model() - self.run_add_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."]) + self.run_add_test(model, self.default_config, ["adapters.{name}."]) def test_leave_out_compacter(self): model = self.get_model() - self.run_leave_out_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), self.leave_out_layers) + self.run_leave_out_test(model, self.default_config, self.leave_out_layers) def test_linear_average_compacter(self): model = self.get_model() - self.run_linear_average_test( - model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."] - ) + self.run_linear_average_test(model, self.default_config, ["adapters.{name}."]) def test_delete_compacter(self): model = self.get_model() - self.run_delete_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), ["adapters.{name}."]) + self.run_delete_test(model, self.default_config, ["adapters.{name}."]) def test_get_compacter(self): model = self.get_model() n_layers = len(list(model.iter_layers())) - self.run_get_test(model, CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8), n_layers + 1) + self.run_get_test(model, self.default_config, n_layers + 1) def test_forward_compacter(self): model = self.get_model() - adapter_config = CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8) - self.run_forward_test(model, adapter_config) + self.run_forward_test(model, self.default_config) def test_forward_shared_phm_compacter(self): model = self.get_model() @@ -39,7 +38,7 @@ def test_forward_shared_phm_compacter(self): self.run_forward_test(model, adapter_config) def test_load_compacter(self): - self.run_load_test(CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8)) + self.run_load_test(self.default_config) def test_train_shared_w_compacter(self): adapter_config = CompacterPlusPlusConfig( @@ -48,8 +47,7 @@ def test_train_shared_w_compacter(self): self.run_train_test(adapter_config, ["adapters.{name}."]) def test_train_shared_phm_compacter(self): - adapter_config = CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8) - self.run_train_test(adapter_config, ["adapters.{name}."]) + self.run_train_test(self.default_config, ["adapters.{name}."]) def test_compacter_generate(self): if self.config_class not in ADAPTER_MODEL_MAPPING or ( @@ -57,23 +55,4 @@ def test_compacter_generate(self): and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types ): self.skipTest("No seq2seq or causal language model head") - - model1 = AutoAdapterModel.from_config(self.config()) - model1.add_adapter("dummy", config=CompacterPlusPlusConfig(phm_dim=2, reduction_factor=8)) - if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[self.config_class].head_types: - model1.add_seq2seq_lm_head("dummy") - else: - model1.add_causal_lm_head("dummy") - model1.set_active_adapters("dummy") - model1.to(torch_device) - - seq_output_length = 32 - - # Finally, also check if generation works properly - input_ids = self.extract_input_ids( - self.get_input_samples(self.generate_input_samples_shape, config=model1.config) - ) - - input_ids = input_ids.to(torch_device) - generated = model1.generate(input_ids, max_length=seq_output_length) - self.assertLessEqual(generated.shape, (1, seq_output_length)) + self.run_generate_test(self.default_config) diff --git a/tests/test_impl/peft/test_lora.py b/tests/test_impl/peft/test_lora.py index 946f28e7cd..bfddec32fa 100644 --- a/tests/test_impl/peft/test_lora.py +++ b/tests/test_impl/peft/test_lora.py @@ -68,7 +68,7 @@ def test_linear_average_only_negate_b_lora(self): averaged_weights = {} for i, w in enumerate(weights): this_filter_keys = [k.format(name=f"{name}_{i}") for k in ["loras.{name}."]] - for k, v in self.filter_parameters(model, this_filter_keys).items(): + for k, v in self._filter_parameters(model, this_filter_keys).items(): base_k = k.replace(f"{name}_{i}", name) # Only negate the lora_B weights and use the absolute value of the weight for lora_A weights. weight = abs(w) if "lora_A" in k else w @@ -92,7 +92,7 @@ def test_linear_average_only_negate_b_lora(self): # compare averaged weights to collected weights this_filter_keys = [k.format(name=name) for k in ["loras.{name}."]] - for k, v in self.filter_parameters(model, this_filter_keys).items(): + for k, v in self._filter_parameters(model, this_filter_keys).items(): self.assertTrue(torch.allclose(v, averaged_weights[k]), k) def _check_svd_weights(self, delta_w, merged_lora, svd_rank, atol=1e-5): @@ -194,7 +194,7 @@ def test_edge_case_average_adapters_single_adapter(self): # collect weights of the first adapter so we can compare them to the newly created adapters in the subsequent tests filter_keys_adapter_0 = [k.format(name=f"{name}_0") for k in ["loras.{name}."]] - adapter_0 = self.filter_parameters(model, filter_keys_adapter_0) + adapter_0 = self._filter_parameters(model, filter_keys_adapter_0) # Run tests for every combine strategy for combine_strategy in ["linear", "lora_linear_only_negate_b", "lora_delta_w_svd"]: @@ -214,7 +214,7 @@ def test_edge_case_average_adapters_single_adapter(self): filter_keys = [k.format(name=f"{combine_strategy}_merged") for k in ["loras.{name}."]] if combine_strategy != "lora_delta_w_svd": - for k, v in self.filter_parameters(model, filter_keys).items(): + for k, v in self._filter_parameters(model, filter_keys).items(): adapter_0_key = k.replace(f"{combine_strategy}_merged", f"{name}_0") self.assertTrue(torch.allclose(v, adapter_0[adapter_0_key])) else: @@ -246,7 +246,7 @@ def test_edge_case_average_adapters_multiple_adapters(self): # collect weights of the first adapter so we can compare them to the newly created adapters in the subsequent tests filter_keys_adapter_0 = [k.format(name=f"{name}_0") for k in ["loras.{name}."]] - adapter_0 = self.filter_parameters(model, filter_keys_adapter_0) + adapter_0 = self._filter_parameters(model, filter_keys_adapter_0) # Run tests for every combine strategy for combine_strategy in ["linear", "lora_linear_only_negate_b", "lora_delta_w_svd"]: @@ -268,7 +268,7 @@ def test_edge_case_average_adapters_multiple_adapters(self): filter_keys = [k.format(name=f"{combine_strategy}_merged") for k in ["loras.{name}."]] if combine_strategy != "lora_delta_w_svd": - for k, v in self.filter_parameters(model, filter_keys).items(): + for k, v in self._filter_parameters(model, filter_keys).items(): adapter_1_key = k.replace(f"{combine_strategy}_merged", f"{name}_0") self.assertTrue(torch.allclose(v, adapter_0[adapter_1_key])) else: diff --git a/tests/test_impl/peft/test_prefix_tuning.py b/tests/test_impl/peft/test_prefix_tuning.py index dd9360a933..333888f342 100644 --- a/tests/test_impl/peft/test_prefix_tuning.py +++ b/tests/test_impl/peft/test_prefix_tuning.py @@ -1,6 +1,6 @@ import torch -from adapters import ADAPTER_MODEL_MAPPING, AutoAdapterModel, PrefixTuningConfig +from adapters import ADAPTER_MODEL_MAPPING, PrefixTuningConfig from tests.test_impl.base import AdapterMethodBaseTestMixin from transformers import CLIPConfig from transformers.testing_utils import require_torch, torch_device @@ -80,23 +80,4 @@ def test_prefix_tuning_generate(self): and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types ): self.skipTest("No seq2seq or causal language model head") - - model1 = AutoAdapterModel.from_config(self.config()) - model1.add_adapter("dummy", config="prefix_tuning") - if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[self.config_class].head_types: - model1.add_seq2seq_lm_head("dummy") - else: - model1.add_causal_lm_head("dummy") - model1.set_active_adapters("dummy") - model1.to(torch_device) - - seq_output_length = 32 - - # Finally, also check if generation works properly - input_ids = self.extract_input_ids( - self.get_input_samples(self.generate_input_samples_shape, config=model1.config) - ) - - input_ids = input_ids.to(torch_device) - generated = model1.generate(input_ids, max_length=seq_output_length) - self.assertLessEqual(generated.shape, (1, seq_output_length)) + self.run_generate_test(PrefixTuningConfig()) diff --git a/tests/test_impl/peft/test_reft.py b/tests/test_impl/peft/test_reft.py index 3baa843c50..a5b52519f8 100644 --- a/tests/test_impl/peft/test_reft.py +++ b/tests/test_impl/peft/test_reft.py @@ -31,7 +31,7 @@ def test_layers_reft(self): model.set_active_adapters(name) # adapter is correctly added to config - self.assert_adapter_available(model, name) + self._assert_adapter_available(model, name) adapter = model.get_adapter(name) diff --git a/tests/test_impl/peft/test_unipelt.py b/tests/test_impl/peft/test_unipelt.py index 2e1786dc97..ca477a362a 100644 --- a/tests/test_impl/peft/test_unipelt.py +++ b/tests/test_impl/peft/test_unipelt.py @@ -55,11 +55,11 @@ def test_output_adapter_gating_scores_unipelt(self): model.set_active_adapters(name) output_1 = model(**input_data, output_adapter_gating_scores=True) - self.assertEqual(len(output_1[0]), self.default_input_samples_shape[0]) + self.assertEqual(len(output_1[0]), self.input_shape[0]) self.assertTrue(hasattr(output_1, "adapter_gating_scores")) gating_scores = output_1.adapter_gating_scores[name] self.assertEqual(len(list(model.iter_layers())), len(gating_scores)) for k, per_layer_scores in gating_scores.items(): self.assertGreaterEqual(len(per_layer_scores), 3) for k, v in per_layer_scores.items(): - self.assertEqual(self.default_input_samples_shape[0], v.shape[0], k) + self.assertEqual(self.input_shape[0], v.shape[0], k) diff --git a/tests/test_impl/utils.py b/tests/test_impl/utils.py new file mode 100644 index 0000000000..ae054e62f5 --- /dev/null +++ b/tests/test_impl/utils.py @@ -0,0 +1,29 @@ +import copy + +from adapters import ADAPTER_MODEL_MAPPING, init + + +def create_twin_models(model_class, config_creator=None): + if config_creator and model_class.__name__.startswith("Auto"): + model_config = config_creator() + model1 = model_class.from_config(model_config) + elif config_creator: + model_config = config_creator() + model1 = model_class(model_config) + else: + model_config = model_class.config_class() + model1 = model_class(model_config) + init(model1) + model1.eval() + # create a twin initialized with the same random weights + model2 = copy.deepcopy(model1) + model2.eval() + return model1, model2 + + +def add_lm_head(config_class, model, adapter_name): + """Add appropriate language model head based on model type""" + if "seq2seq_lm" in ADAPTER_MODEL_MAPPING[config_class].head_types: + model.add_seq2seq_lm_head(adapter_name) + else: + model.add_causal_lm_head(adapter_name) diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index 392c9ea1ff..439b3294d8 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -59,29 +59,24 @@ def get_model(self): model.to(torch_device) return model - def build_random_tensor(self, shape, dtype=torch.float, **kwargs): + def build_rand_tensor(self, shape, dtype=torch.float): """Creates a random tensor of the given shape.""" - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - if dtype == torch.long and "vocab_size" in kwargs: - values = [random.randint(0, kwargs["vocab_size"] - 1) for _ in range(total_dims)] - elif dtype == torch.float: - values = [random.random() for _ in range(total_dims)] - else: - raise ValueError(f"Unsupported dtype {dtype}") + total_dims = self._calc_total_dim(shape) + values = [random.random() for _ in range(total_dims)] + return torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() - def assert_adapter_available(self, model, adapter_name): - """Check wether the adapter name is present in the model's adapter config and has been created.""" - self.assertTrue(adapter_name in model.adapters_config) - self.assertGreater(len(model.get_adapter(adapter_name)), 0) + def build_rand_ids_tensor(self, shape, vocab_size=5000): + """Creates a random tensor of type torch.long with the given shape with random values in range 0 - (vocab_size-1).""" + total_dims = self._calc_total_dim(shape) + values = [random.randint(0, vocab_size - 1) for _ in range(total_dims)] + return torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() - def assert_adapter_unavailable(self, model, adapter_name): - """Check wether the adapter name is not present in the model's adapter config and has not been created.""" - self.assertFalse(adapter_name in model.adapters_config) - self.assertEqual(len(model.get_adapter(adapter_name)), 0) + def _calc_total_dim(self, shape): + total_dims = 1 + for dim in shape: + total_dims *= dim + return total_dims def extract_input_ids(self, inputs): # TODO: Check if this is needed in all tests and if it differs between text, vision and speech models @@ -101,7 +96,7 @@ class TextAdapterTestBase(AbstractAdapterTestBase): def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): shape = shape or self.input_shape - input_ids = self.build_random_tensor(shape, dtype=torch.long) + input_ids = self.build_rand_ids_tensor(shape, vocab_size=vocab_size) # Ensures that only tha last token in each sample is the eos token (needed e.g. for BART) if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: @@ -112,6 +107,9 @@ def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: in_data["decoder_input_ids"] = input_ids.clone() + + if "num_labels" in kwargs: + in_data["labels"] = self.build_rand_ids_tensor(shape[:-1], vocab_size=kwargs["num_labels"]) return in_data def add_head(self, model, name, **kwargs): diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index c508c9b54f..b025963dd7 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -29,9 +29,7 @@ class Core( AdapterFusionModelTestMixin, unittest.TestCase, ): - def test_context_simple(self): - expected_number_of_adapter_calls = ceil(self.config().num_hidden_layers / self.config().num_hidden_groups) - super().test_context_simple(expected_number_of_adapter_calls=expected_number_of_adapter_calls) + pass @require_torch @@ -52,7 +50,9 @@ class Heads( PredictionHeadModelTestMixin, unittest.TestCase, ): - pass + def test_context_simple(self): + expected_number_of_adapter_calls = ceil(self.config().num_hidden_layers / self.config().num_hidden_groups) + super().test_context_simple(expected_number_of_adapter_calls=expected_number_of_adapter_calls) @require_torch From 83d3b32f87c36bbb0165c9a6484a49b4dead7930 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 5 Nov 2024 11:55:08 +0100 Subject: [PATCH 09/63] Fix make quality --- examples/pytorch/language-modeling/run_clm.py | 2 +- setup.cfg | 3 +++ tests/test_methods/imports.py | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 20d7fbba0d..f2a7b14fef 100644 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -442,7 +442,7 @@ def main(): else: model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code) n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values()) - logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") + logger.info(f"Training new model from scratch - Total size={n_params / 2**20:.2f}M params") # Convert the model into an adapter model adapters.init(model) diff --git a/setup.cfg b/setup.cfg index ccad3796df..1e141b3da2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,9 @@ use_parentheses = True [flake8] ignore = E203, E501, E731, E741, W503, W605 max-line-length = 119 +per-file-ignores = + tests/test_methods/imports.py: F401, F403, F405 + tests/test_methods/test_*.py:F403,F405 [tool:pytest] doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS \ No newline at end of file diff --git a/tests/test_methods/imports.py b/tests/test_methods/imports.py index 09e8952848..87ddc95a44 100644 --- a/tests/test_methods/imports.py +++ b/tests/test_methods/imports.py @@ -11,6 +11,7 @@ from tests.test_impl.heads.test_adapter_heads import PredictionHeadModelTestMixin from tests.test_impl.peft.test_adapter_common import BottleneckAdapterTestMixin from tests.test_impl.peft.test_compacter import CompacterTestMixin +from tests.test_impl.peft.test_config_union import ConfigUnionAdapterTest from tests.test_impl.peft.test_ia3 import IA3TestMixin from tests.test_impl.peft.test_lora import LoRATestMixin from tests.test_impl.peft.test_prefix_tuning import PrefixTuningTestMixin @@ -18,4 +19,5 @@ from tests.test_impl.peft.test_reft import ReftTestMixin from tests.test_impl.peft.test_unipelt import UniPELTTestMixin from tests.test_methods.base import TextAdapterTestBase, make_config +from transformers import AutoTokenizer from transformers.testing_utils import require_torch From 5e8e1b863494a310c34e180d781ac5a6d0ba9f41 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 5 Nov 2024 16:46:46 +0100 Subject: [PATCH 10/63] Add gpt2 tests --- tests/test_methods/test_gpt2.py | 159 ++++++++++++++++++++++++++++++++ tests/test_models/test_gpt2.py | 12 +++ 2 files changed, 171 insertions(+) create mode 100644 tests/test_methods/test_gpt2.py create mode 100644 tests/test_models/test_gpt2.py diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_gpt2.py new file mode 100644 index 0000000000..ba724f8a5c --- /dev/null +++ b/tests/test_methods/test_gpt2.py @@ -0,0 +1,159 @@ +from transformers import GPT2Config + +from .imports import * + + +class GPT2AdapterTestBase(TextAdapterTestBase): + config_class = GPT2Config + config = make_config( + GPT2Config, + n_embd=32, + n_layer=4, + n_head=4, + # set pad token to eos token + pad_token_id=50256, + ) + tokenizer_name = "gpt2" + + +@require_torch +@pytest.mark.core +class Core( + GPT2AdapterTestBase, + ModelClassConversionTestMixin, + CompabilityTestMixin, + AdapterFusionModelTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.composition +class Composition( + GPT2AdapterTestBase, + ParallelAdapterInferenceTestMixin, + ParallelTrainingMixin, + unittest.TestCase, +): + def test_parallel_training_lora(self): + self.skipTest("Not supported for GPT2") + + +@require_torch +@pytest.mark.heads +class Heads( + GPT2AdapterTestBase, + PredictionHeadModelTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.embeddings +class Embeddings( + GPT2AdapterTestBase, + EmbeddingTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.class_conversion +class ClassConversion( + ModelClassConversionTestMixin, + GPT2AdapterTestBase, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.prefix_tuning +class PrefixTuning( + GPT2AdapterTestBase, + PrefixTuningTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.prompt_tuning +class PromptTuning( + GPT2AdapterTestBase, + PromptTuningTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.reft +class ReFT( + GPT2AdapterTestBase, + ReftTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.unipelt +class UniPELT( + GPT2AdapterTestBase, + UniPELTTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.compacter +class Compacter( + GPT2AdapterTestBase, + CompacterTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.bottleneck +class Bottleneck( + GPT2AdapterTestBase, + BottleneckAdapterTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.ia3 +class IA3( + GPT2AdapterTestBase, + IA3TestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.lora +class LoRA( + GPT2AdapterTestBase, + LoRATestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class ConfigUnion( + GPT2AdapterTestBase, + ConfigUnionAdapterTest, + unittest.TestCase, +): + pass diff --git a/tests/test_models/test_gpt2.py b/tests/test_models/test_gpt2.py new file mode 100644 index 0000000000..f904be53b8 --- /dev/null +++ b/tests/test_models/test_gpt2.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import GPT2AdapterModel +from hf_transformers.tests.models.gpt2.test_modeling_gpt2 import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class GPT2AdapterModelTest(AdapterModelTesterMixin, GPT2ModelTest): + all_model_classes = (GPT2AdapterModel,) + fx_compatible = False From 53eb0b93e883f9edc912a26784184179f3bf52dd Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 16:17:38 +0100 Subject: [PATCH 11/63] Fix config union and head tests --- Makefile | 4 +++ tests/test_impl/base.py | 5 +++ tests/test_impl/heads/test_adapter_heads.py | 37 +++++++++------------ tests/test_impl/peft/test_config_union.py | 13 ++++---- tests/test_methods/base.py | 5 ++- tests/test_methods/test_albert.py | 10 ++++++ tests/test_methods/test_gpt2.py | 1 + 7 files changed, 44 insertions(+), 31 deletions(-) diff --git a/Makefile b/Makefile index f12142d5e9..862f04bf52 100644 --- a/Makefile +++ b/Makefile @@ -94,6 +94,10 @@ test-adapter-ia3: test-adapter-lora: python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m lora +# run the config union tests for all models +test-adapter-config_union: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m config_union + # Run tests for examples test-examples: python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/ diff --git a/tests/test_impl/base.py b/tests/test_impl/base.py index fc2338626f..090d277fd6 100644 --- a/tests/test_impl/base.py +++ b/tests/test_impl/base.py @@ -53,6 +53,10 @@ def run_add_test(self, model, adapter_config, filter_keys): self.assertTrue(v.requires_grad, k) self.assertTrue(has_weights) + # TODO: for config union tests resetting model should lead to that adapter can be readded with the same name, but currently not working + model.set_active_adapters(None) + model.delete_adapter(name) + def run_leave_out_test(self, model, adapter_config, leave_out): model.eval() @@ -152,6 +156,7 @@ def run_forward_test(self, model, adapter_config, dtype=torch.float32): model.eval() name = adapter_config.__class__.__name__ + # TODO: this defeats the purpose of the test, for the config union tests as only the first config is added if name not in model.adapters_config: model.add_adapter(name, config=adapter_config) model.to(torch_device).to(dtype) diff --git a/tests/test_impl/heads/test_adapter_heads.py b/tests/test_impl/heads/test_adapter_heads.py index c1426d1c6f..ab0c813a5d 100644 --- a/tests/test_impl/heads/test_adapter_heads.py +++ b/tests/test_impl/heads/test_adapter_heads.py @@ -7,7 +7,7 @@ from adapters import ADAPTER_MODEL_MAPPING, AdapterSetup, AutoAdapterModel from adapters.composition import BatchSplit, Stack from adapters.heads import PredictionHead -from tests.test_impl.base import create_twin_models +from tests.test_impl.utils import create_twin_models from transformers import AutoModelForSequenceClassification from transformers.testing_utils import require_torch, torch_device @@ -20,10 +20,8 @@ def run_prediction_head_test( compare_model, head_name, input_shape=None, - output_shape=(1, 2), + output_shape=None, label_dict=None, - num_labels=None, - with_labels=False, ): # first, check if the head is actually correctly registered as part of the pt module self.assertTrue(f"heads.{head_name}" in dict(model.named_modules())) @@ -42,10 +40,8 @@ def run_prediction_head_test( # make a forward pass model.active_head = head_name - input_shape = input_shape if input_shape is not None else self._get_input_shape() - in_data = self.get_input_samples( - input_shape, config=model.config, num_labels=num_labels, with_labels=with_labels - ) + input_shape = input_shape if input_shape else self.input_shape + in_data = self.get_input_samples(shape=input_shape, config=model.config) if label_dict: for k, v in label_dict.items(): in_data[k] = v @@ -69,7 +65,9 @@ def test_classification_head(self): model1.add_classification_head("dummy") label_dict = {} label_dict["labels"] = torch.zeros(self.batch_size, dtype=torch.long, device=torch_device) - self.run_prediction_head_test(model1, model2, "dummy", label_dict=label_dict) + self.run_prediction_head_test( + model1, model2, "dummy", label_dict=label_dict, output_shape=(self.batch_size, 2) + ) def test_image_classification_head(self): if "image_classification" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types: @@ -92,7 +90,12 @@ def test_multiple_choice_head(self): label_dict = {} label_dict["labels"] = torch.ones(self.batch_size, dtype=torch.long, device=torch_device) self.run_prediction_head_test( - model1, model2, "dummy", input_shape=(self.batch_size, 2, self.seq_length), label_dict=label_dict + model1, + model2, + "dummy", + input_shape=(self.batch_size, 2, self.seq_length), + label_dict=label_dict, + output_shape=(self.batch_size, 2), ) def test_tagging_head(self): @@ -173,8 +176,7 @@ def test_seq2seq_lm_head(self): ) # Finally, also check if generation works properly - input_shape = self._get_input_shape() - input_ids = self.extract_input_ids(self.get_input_samples(input_shape, config=model1.config)) + input_ids = self.extract_input_ids(self.get_input_samples(self.input_shape, config=model1.config)) input_ids = input_ids.to(torch_device) # Use a different length for the seq2seq output @@ -421,8 +423,7 @@ def forward_pre_hook(module, input): self.assertIsNotNone(inv_adapter) inv_adapter.register_forward_pre_hook(forward_pre_hook) - input_shape = self._get_input_shape() - in_data = self.get_input_samples(input_shape, config=model.config) + in_data = self.get_input_samples(self.input_shape, config=model.config) model.to(torch_device) out = model(**in_data) @@ -471,14 +472,6 @@ def test_save_all_adapters_with_head(self): model.save_all_adapters(tmp_dir, with_head=False) self.assertFalse(os.path.isfile(os.path.join(tmp_dir, "test", "head_config.json"))) - def _get_input_shape(self): - # speech models require a different input dimensions compared to text models - if self.is_speech_model: - input_shape = (self.batch_size, self.seq_length, self.time_window) - else: - input_shape = (self.batch_size, self.seq_length) - return input_shape - def test_average_head(self): # Test the average_head method model = AutoAdapterModel.from_config(self.config()) diff --git a/tests/test_impl/peft/test_config_union.py b/tests/test_impl/peft/test_config_union.py index 682d635eb4..ca33533089 100644 --- a/tests/test_impl/peft/test_config_union.py +++ b/tests/test_impl/peft/test_config_union.py @@ -16,20 +16,20 @@ class ConfigUnionAdapterTest(AdapterMethodBaseTestMixin): ( ConfigUnion( PrefixTuningConfig(), - ParBnConfig(), + ParBnConfig(phm_dim=1), ), ["adapters.{name}.", "prefix_tunings.{name}."], ), ( ConfigUnion( - CompacterConfig(), + CompacterConfig(phm_dim=1), LoRAConfig(), ), ["adapters.{name}.", "loras.{name}."], ), ( ConfigUnion( - SeqBnConfig(), + SeqBnConfig(phm_dim=1), LoRAConfig(), ), ["adapters.{name}.", "loras.{name}."], @@ -37,10 +37,11 @@ class ConfigUnionAdapterTest(AdapterMethodBaseTestMixin): ] def test_add_union_adapter(self): - model = self.get_model() - model.eval() - + # TODO: Discuss, why old tests were not working properly (could not work because we would add three times the same adapter name) + # TODO: Discuss why these config unions are not working properly (must set phm_dim=1) for adapter_config, filter_keys in self.adapter_configs_to_test: + model = self.get_model() + model.eval() with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): self.run_add_test(model, adapter_config, filter_keys) diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index 439b3294d8..6230265f8d 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -90,9 +90,8 @@ class TextAdapterTestBase(AbstractAdapterTestBase): input_shape_generate = (1, 4) leave_out_layers = [0, 1] batch_size, seq_length = ( - 1, - 128, - ) # TODO: Check in which tests this is needed and if we can simplify by using input_shape + input_shape # TODO: Check in which tests this is needed and if we can simplify by using input_shape + ) def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): shape = shape or self.input_shape diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index b025963dd7..616680e537 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -153,3 +153,13 @@ class LoRA( unittest.TestCase, ): pass + + +@require_torch +@pytest.mark.config_union +class ConfigUnion( + AlbertAdapterTestBase, + ConfigUnionAdapterTest, + unittest.TestCase, +): + pass diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_gpt2.py index ba724f8a5c..d43801068e 100644 --- a/tests/test_methods/test_gpt2.py +++ b/tests/test_methods/test_gpt2.py @@ -151,6 +151,7 @@ class LoRA( @require_torch +@pytest.mark.config_union class ConfigUnion( GPT2AdapterTestBase, ConfigUnionAdapterTest, From 1dbd4122667312cdd37f37de9e37ed37b0bc6129 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 17:47:28 +0100 Subject: [PATCH 12/63] Fix paths and imports --- .../composition/test_adapter_composition.py | 2 +- tests/test_impl/core/test_adapter_hub.py | 8 +++++--- .../heads/test_adapter_custom_head.py | 2 +- .../trainer/test_adapter_trainer_ext.py | 2 +- tests/test_impl/utils.py | 18 ++++++++++++++++++ tests/test_methods/base.py | 15 --------------- tests/test_methods/imports.py | 3 ++- 7 files changed, 28 insertions(+), 22 deletions(-) diff --git a/tests/test_impl/composition/test_adapter_composition.py b/tests/test_impl/composition/test_adapter_composition.py index 417619dd55..410aea7ec8 100644 --- a/tests/test_impl/composition/test_adapter_composition.py +++ b/tests/test_impl/composition/test_adapter_composition.py @@ -5,7 +5,7 @@ import adapters from adapters import IA3Config, LoRAConfig, PrefixTuningConfig, SeqBnConfig from adapters.composition import Average, BatchSplit, Fuse, Parallel, Split, Stack, parse_composition -from tests.test_methods.base import ids_tensor +from tests.test_impl.utils import ids_tensor from transformers import BertConfig, BertForSequenceClassification from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/core/test_adapter_hub.py b/tests/test_impl/core/test_adapter_hub.py index 62120fa740..56e33f9957 100644 --- a/tests/test_impl/core/test_adapter_hub.py +++ b/tests/test_impl/core/test_adapter_hub.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import unittest import numpy as np @@ -7,7 +8,7 @@ from adapters import ADAPTER_CONFIG_MAP, AdapterConfig, BertAdapterModel, get_adapter_config_hash from adapters.trainer import AdapterTrainer as Trainer from adapters.utils import find_in_index -from tests.test_methods.base import ids_tensor +from tests.test_impl.utils import ids_tensor from transformers import ( AutoModel, AutoTokenizer, @@ -19,8 +20,9 @@ from transformers.testing_utils import require_torch, torch_device -SAMPLE_INDEX = os.path.join(os.path.dirname(os.path.abspath(__file__)), "fixtures/hub-index.sample.json") - +current_file_path = os.path.abspath(__file__) +fixtures_dir = Path(current_file_path).parent.parent.parent / 'fixtures' +SAMPLE_INDEX = str(fixtures_dir / 'hub-index.sample.json') @require_torch class AdapterHubTest(unittest.TestCase): diff --git a/tests/test_impl/heads/test_adapter_custom_head.py b/tests/test_impl/heads/test_adapter_custom_head.py index b7db138a2d..6de8a45d2c 100644 --- a/tests/test_impl/heads/test_adapter_custom_head.py +++ b/tests/test_impl/heads/test_adapter_custom_head.py @@ -5,7 +5,7 @@ from adapters import AutoAdapterModel from adapters.heads import ClassificationHead, PredictionHead -from tests.test_methods.base import ids_tensor +from tests.test_impl.utils import ids_tensor from transformers import AutoConfig from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/trainer/test_adapter_trainer_ext.py b/tests/test_impl/trainer/test_adapter_trainer_ext.py index 3e49e25f5d..6984ce46ca 100644 --- a/tests/test_impl/trainer/test_adapter_trainer_ext.py +++ b/tests/test_impl/trainer/test_adapter_trainer_ext.py @@ -268,7 +268,7 @@ def run_trainer( do_predict: bool = True, n_gpus_to_use: int = None, ): - data_dir = self.test_file_dir / "../../hf_transformers/tests/fixtures/tests_samples/wmt_en_ro" + data_dir = self.test_file_dir / "../../../hf_transformers/tests/fixtures/tests_samples/wmt_en_ro" output_dir = self.get_auto_remove_tmp_dir() args_train = f""" --model_name_or_path {model_name} diff --git a/tests/test_impl/utils.py b/tests/test_impl/utils.py index ae054e62f5..7ac734f2be 100644 --- a/tests/test_impl/utils.py +++ b/tests/test_impl/utils.py @@ -1,6 +1,12 @@ import copy +import random + +import torch from adapters import ADAPTER_MODEL_MAPPING, init +from transformers.testing_utils import torch_device + +global_rng = random.Random() def create_twin_models(model_class, config_creator=None): @@ -27,3 +33,15 @@ def add_lm_head(config_class, model, adapter_name): model.add_seq2seq_lm_head(adapter_name) else: model.add_causal_lm_head(adapter_name) + + +def make_config(config_class, **kwargs): + return staticmethod(lambda: config_class(**kwargs)) + + +def ids_tensor(shape, vocab_size=5000, dtype=torch.long): + total_dims = 1 + for dim in shape: + total_dims *= dim + values = [global_rng.randint(0, vocab_size - 1) for _ in range(total_dims)] + return torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index 6230265f8d..345cedf9f3 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -9,21 +9,6 @@ from transformers.testing_utils import torch_device -global_rng = random.Random() - - -def make_config(config_class, **kwargs): - return staticmethod(lambda: config_class(**kwargs)) - - -def ids_tensor(shape, dtype=torch.long, vocab_size=5000): - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [global_rng.randint(0, vocab_size - 1) for _ in range(total_dims)] - return torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() - - class AbstractAdapterTestBase: """Base class for adapter tests. Defines basic functions and attributes with default values which are used in the tests. Model test classes should inherit from this class or subclass and override the attributes and functions as needed. diff --git a/tests/test_methods/imports.py b/tests/test_methods/imports.py index 87ddc95a44..59df3990f8 100644 --- a/tests/test_methods/imports.py +++ b/tests/test_methods/imports.py @@ -18,6 +18,7 @@ from tests.test_impl.peft.test_prompt_tuning import PromptTuningTestMixin from tests.test_impl.peft.test_reft import ReftTestMixin from tests.test_impl.peft.test_unipelt import UniPELTTestMixin -from tests.test_methods.base import TextAdapterTestBase, make_config +from tests.test_impl.utils import make_config +from tests.test_methods.base import TextAdapterTestBase from transformers import AutoTokenizer from transformers.testing_utils import require_torch From cf4f6a710c6bbb4b17e081e9cfac790e2347a464 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 17:50:00 +0100 Subject: [PATCH 13/63] remove accidently added prompt tuning from gpt2 and make style --- tests/test_impl/core/test_adapter_hub.py | 7 ++++--- tests/test_impl/utils.py | 1 + tests/test_methods/test_gpt2.py | 10 ---------- 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/tests/test_impl/core/test_adapter_hub.py b/tests/test_impl/core/test_adapter_hub.py index 56e33f9957..7aa22e22c8 100644 --- a/tests/test_impl/core/test_adapter_hub.py +++ b/tests/test_impl/core/test_adapter_hub.py @@ -1,6 +1,6 @@ import os -from pathlib import Path import unittest +from pathlib import Path import numpy as np @@ -21,8 +21,9 @@ current_file_path = os.path.abspath(__file__) -fixtures_dir = Path(current_file_path).parent.parent.parent / 'fixtures' -SAMPLE_INDEX = str(fixtures_dir / 'hub-index.sample.json') +fixtures_dir = Path(current_file_path).parent.parent.parent / "fixtures" +SAMPLE_INDEX = str(fixtures_dir / "hub-index.sample.json") + @require_torch class AdapterHubTest(unittest.TestCase): diff --git a/tests/test_impl/utils.py b/tests/test_impl/utils.py index 7ac734f2be..473c422e60 100644 --- a/tests/test_impl/utils.py +++ b/tests/test_impl/utils.py @@ -6,6 +6,7 @@ from adapters import ADAPTER_MODEL_MAPPING, init from transformers.testing_utils import torch_device + global_rng = random.Random() diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_gpt2.py index d43801068e..78573f8706 100644 --- a/tests/test_methods/test_gpt2.py +++ b/tests/test_methods/test_gpt2.py @@ -80,16 +80,6 @@ class PrefixTuning( pass -@require_torch -@pytest.mark.prompt_tuning -class PromptTuning( - GPT2AdapterTestBase, - PromptTuningTestMixin, - unittest.TestCase, -): - pass - - @require_torch @pytest.mark.reft class ReFT( From b390d61b016a36ea191b7b8d5a00ae488285094d Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 17:57:42 +0100 Subject: [PATCH 14/63] Revert PromptTuning changes --- tests/test_impl/base.py | 5 ----- tests/test_impl/peft/test_config_union.py | 13 ++++++------- tests/test_methods/test_albert.py | 10 ---------- 3 files changed, 6 insertions(+), 22 deletions(-) diff --git a/tests/test_impl/base.py b/tests/test_impl/base.py index 090d277fd6..fc2338626f 100644 --- a/tests/test_impl/base.py +++ b/tests/test_impl/base.py @@ -53,10 +53,6 @@ def run_add_test(self, model, adapter_config, filter_keys): self.assertTrue(v.requires_grad, k) self.assertTrue(has_weights) - # TODO: for config union tests resetting model should lead to that adapter can be readded with the same name, but currently not working - model.set_active_adapters(None) - model.delete_adapter(name) - def run_leave_out_test(self, model, adapter_config, leave_out): model.eval() @@ -156,7 +152,6 @@ def run_forward_test(self, model, adapter_config, dtype=torch.float32): model.eval() name = adapter_config.__class__.__name__ - # TODO: this defeats the purpose of the test, for the config union tests as only the first config is added if name not in model.adapters_config: model.add_adapter(name, config=adapter_config) model.to(torch_device).to(dtype) diff --git a/tests/test_impl/peft/test_config_union.py b/tests/test_impl/peft/test_config_union.py index ca33533089..682d635eb4 100644 --- a/tests/test_impl/peft/test_config_union.py +++ b/tests/test_impl/peft/test_config_union.py @@ -16,20 +16,20 @@ class ConfigUnionAdapterTest(AdapterMethodBaseTestMixin): ( ConfigUnion( PrefixTuningConfig(), - ParBnConfig(phm_dim=1), + ParBnConfig(), ), ["adapters.{name}.", "prefix_tunings.{name}."], ), ( ConfigUnion( - CompacterConfig(phm_dim=1), + CompacterConfig(), LoRAConfig(), ), ["adapters.{name}.", "loras.{name}."], ), ( ConfigUnion( - SeqBnConfig(phm_dim=1), + SeqBnConfig(), LoRAConfig(), ), ["adapters.{name}.", "loras.{name}."], @@ -37,11 +37,10 @@ class ConfigUnionAdapterTest(AdapterMethodBaseTestMixin): ] def test_add_union_adapter(self): - # TODO: Discuss, why old tests were not working properly (could not work because we would add three times the same adapter name) - # TODO: Discuss why these config unions are not working properly (must set phm_dim=1) + model = self.get_model() + model.eval() + for adapter_config, filter_keys in self.adapter_configs_to_test: - model = self.get_model() - model.eval() with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): self.run_add_test(model, adapter_config, filter_keys) diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index 616680e537..b025963dd7 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -153,13 +153,3 @@ class LoRA( unittest.TestCase, ): pass - - -@require_torch -@pytest.mark.config_union -class ConfigUnion( - AlbertAdapterTestBase, - ConfigUnionAdapterTest, - unittest.TestCase, -): - pass From 2193aeed813512bffd571e3f51d0e748fc8d07b5 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 18:23:07 +0100 Subject: [PATCH 15/63] Revert "Revert PromptTuning changes" This reverts commit b390d61b016a36ea191b7b8d5a00ae488285094d. --- tests/test_impl/base.py | 5 +++++ tests/test_impl/peft/test_config_union.py | 13 +++++++------ tests/test_methods/test_albert.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/tests/test_impl/base.py b/tests/test_impl/base.py index fc2338626f..090d277fd6 100644 --- a/tests/test_impl/base.py +++ b/tests/test_impl/base.py @@ -53,6 +53,10 @@ def run_add_test(self, model, adapter_config, filter_keys): self.assertTrue(v.requires_grad, k) self.assertTrue(has_weights) + # TODO: for config union tests resetting model should lead to that adapter can be readded with the same name, but currently not working + model.set_active_adapters(None) + model.delete_adapter(name) + def run_leave_out_test(self, model, adapter_config, leave_out): model.eval() @@ -152,6 +156,7 @@ def run_forward_test(self, model, adapter_config, dtype=torch.float32): model.eval() name = adapter_config.__class__.__name__ + # TODO: this defeats the purpose of the test, for the config union tests as only the first config is added if name not in model.adapters_config: model.add_adapter(name, config=adapter_config) model.to(torch_device).to(dtype) diff --git a/tests/test_impl/peft/test_config_union.py b/tests/test_impl/peft/test_config_union.py index 682d635eb4..ca33533089 100644 --- a/tests/test_impl/peft/test_config_union.py +++ b/tests/test_impl/peft/test_config_union.py @@ -16,20 +16,20 @@ class ConfigUnionAdapterTest(AdapterMethodBaseTestMixin): ( ConfigUnion( PrefixTuningConfig(), - ParBnConfig(), + ParBnConfig(phm_dim=1), ), ["adapters.{name}.", "prefix_tunings.{name}."], ), ( ConfigUnion( - CompacterConfig(), + CompacterConfig(phm_dim=1), LoRAConfig(), ), ["adapters.{name}.", "loras.{name}."], ), ( ConfigUnion( - SeqBnConfig(), + SeqBnConfig(phm_dim=1), LoRAConfig(), ), ["adapters.{name}.", "loras.{name}."], @@ -37,10 +37,11 @@ class ConfigUnionAdapterTest(AdapterMethodBaseTestMixin): ] def test_add_union_adapter(self): - model = self.get_model() - model.eval() - + # TODO: Discuss, why old tests were not working properly (could not work because we would add three times the same adapter name) + # TODO: Discuss why these config unions are not working properly (must set phm_dim=1) for adapter_config, filter_keys in self.adapter_configs_to_test: + model = self.get_model() + model.eval() with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): self.run_add_test(model, adapter_config, filter_keys) diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index b025963dd7..616680e537 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -153,3 +153,13 @@ class LoRA( unittest.TestCase, ): pass + + +@require_torch +@pytest.mark.config_union +class ConfigUnion( + AlbertAdapterTestBase, + ConfigUnionAdapterTest, + unittest.TestCase, +): + pass From f555484c49a68d0cb815aa5fc1ef871040402be7 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 18:37:54 +0100 Subject: [PATCH 16/63] Re-add missing adapter model tests --- tests/test_models/test_bart.py | 12 +++++++ tests/test_models/test_beit.py | 12 +++++++ tests/test_models/test_bert.py | 12 +++++++ tests/test_models/test_bert_generation.py | 12 +++++++ tests/test_models/test_clip.py | 39 +++++++++++++++++++++++ tests/test_models/test_deberta.py | 12 +++++++ tests/test_models/test_debertaV2.py | 12 +++++++ tests/test_models/test_distilbert.py | 12 +++++++ tests/test_models/test_electra.py | 12 +++++++ tests/test_models/test_encoder_decoder.py | 2 ++ tests/test_models/test_gptj.py | 12 +++++++ tests/test_models/test_llama.py | 12 +++++++ tests/test_models/test_mbart.py | 12 +++++++ tests/test_models/test_mistral.py | 12 +++++++ tests/test_models/test_mt5.py | 12 +++++++ tests/test_models/test_plbart.py | 12 +++++++ tests/test_models/test_roberta.py | 12 +++++++ tests/test_models/test_t5.py | 12 +++++++ tests/test_models/test_vit.py | 12 +++++++ tests/test_models/test_whisper.py | 12 +++++++ tests/test_models/test_xlm_roberta.py | 2 ++ tests/test_models/test_xmod.py | 12 +++++++ 22 files changed, 271 insertions(+) create mode 100644 tests/test_models/test_bart.py create mode 100644 tests/test_models/test_beit.py create mode 100644 tests/test_models/test_bert.py create mode 100644 tests/test_models/test_bert_generation.py create mode 100644 tests/test_models/test_clip.py create mode 100644 tests/test_models/test_deberta.py create mode 100644 tests/test_models/test_debertaV2.py create mode 100644 tests/test_models/test_distilbert.py create mode 100644 tests/test_models/test_electra.py create mode 100644 tests/test_models/test_encoder_decoder.py create mode 100644 tests/test_models/test_gptj.py create mode 100644 tests/test_models/test_llama.py create mode 100644 tests/test_models/test_mbart.py create mode 100644 tests/test_models/test_mistral.py create mode 100644 tests/test_models/test_mt5.py create mode 100644 tests/test_models/test_plbart.py create mode 100644 tests/test_models/test_roberta.py create mode 100644 tests/test_models/test_t5.py create mode 100644 tests/test_models/test_vit.py create mode 100644 tests/test_models/test_whisper.py create mode 100644 tests/test_models/test_xlm_roberta.py create mode 100644 tests/test_models/test_xmod.py diff --git a/tests/test_models/test_bart.py b/tests/test_models/test_bart.py new file mode 100644 index 0000000000..70d97c97a0 --- /dev/null +++ b/tests/test_models/test_bart.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import BartAdapterModel +from hf_transformers.tests.models.bart.test_modeling_bart import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class BartAdapterModelTest(AdapterModelTesterMixin, BartModelTest): + all_model_classes = (BartAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_beit.py b/tests/test_models/test_beit.py new file mode 100644 index 0000000000..1d6fc92727 --- /dev/null +++ b/tests/test_models/test_beit.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import BeitAdapterModel +from hf_transformers.tests.models.beit.test_modeling_beit import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class BeitAdapterModelTest(AdapterModelTesterMixin, BeitModelTest): + all_model_classes = (BeitAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_bert.py b/tests/test_models/test_bert.py new file mode 100644 index 0000000000..1ca69b0b8f --- /dev/null +++ b/tests/test_models/test_bert.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import BertAdapterModel +from hf_transformers.tests.models.bert.test_modeling_bert import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class BertAdapterModelTest(AdapterModelTesterMixin, BertModelTest): + all_model_classes = (BertAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_bert_generation.py b/tests/test_models/test_bert_generation.py new file mode 100644 index 0000000000..15f867e00d --- /dev/null +++ b/tests/test_models/test_bert_generation.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import BertGenerationAdapterModel +from hf_transformers.tests.models.bert_generation.test_modeling_bert_generation import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class BertGenerationAdapterModelTest(AdapterModelTesterMixin, BertGenerationEncoderTest): + all_model_classes = (BertGenerationAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_clip.py b/tests/test_models/test_clip.py new file mode 100644 index 0000000000..921e0668f5 --- /dev/null +++ b/tests/test_models/test_clip.py @@ -0,0 +1,39 @@ +# flake8: noqa: F403,F405 +import numpy as np + +from adapters import CLIPAdapterModel +from hf_transformers.tests.models.clip.test_modeling_clip import * # Imported to execute model tests +from hf_transformers.tests.test_modeling_common import _config_zero_init +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class CLIPAdapterModelTest(AdapterModelTesterMixin, CLIPModelTest): + all_model_classes = (CLIPAdapterModel,) + fx_compatible = False + + # override as the `logit_scale` parameter has a different name in the adapter model + def test_initialization(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + configs_no_init = _config_zero_init(config) + for model_class in self.all_model_classes: + model = model_class(config=configs_no_init) + for name, param in model.named_parameters(): + if param.requires_grad: + # check if `logit_scale` is initilized as per the original implementation + if name == "clip.logit_scale": + self.assertAlmostEqual( + param.data.item(), + np.log(1 / 0.07), + delta=1e-3, + msg=f"Parameter {name} of model {model_class} seems not properly initialized", + ) + else: + self.assertIn( + ((param.data.mean() * 1e9).round() / 1e9).item(), + [0.0, 1.0], + msg=f"Parameter {name} of model {model_class} seems not properly initialized", + ) diff --git a/tests/test_models/test_deberta.py b/tests/test_models/test_deberta.py new file mode 100644 index 0000000000..27f94bf1b4 --- /dev/null +++ b/tests/test_models/test_deberta.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import DebertaAdapterModel +from hf_transformers.tests.models.deberta.test_modeling_deberta import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class DebertaAdapterModelTest(AdapterModelTesterMixin, DebertaModelTest): + all_model_classes = (DebertaAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_debertaV2.py b/tests/test_models/test_debertaV2.py new file mode 100644 index 0000000000..9e97466cc5 --- /dev/null +++ b/tests/test_models/test_debertaV2.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import DebertaV2AdapterModel +from hf_transformers.tests.models.deberta_v2.test_modeling_deberta_v2 import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class DebertaV2AdapterModelTest(AdapterModelTesterMixin, DebertaV2ModelTest): + all_model_classes = (DebertaV2AdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_distilbert.py b/tests/test_models/test_distilbert.py new file mode 100644 index 0000000000..56cad41de0 --- /dev/null +++ b/tests/test_models/test_distilbert.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import DistilBertAdapterModel +from hf_transformers.tests.models.distilbert.test_modeling_distilbert import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class DistilBertAdapterModelTest(AdapterModelTesterMixin, DistilBertModelTest): + all_model_classes = (DistilBertAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_electra.py b/tests/test_models/test_electra.py new file mode 100644 index 0000000000..642eeb0c04 --- /dev/null +++ b/tests/test_models/test_electra.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import ElectraAdapterModel +from hf_transformers.tests.models.electra.test_modeling_electra import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class ElectraAdapterModelTest(AdapterModelTesterMixin, ElectraModelTester): + all_model_classes = (ElectraAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_encoder_decoder.py b/tests/test_models/test_encoder_decoder.py new file mode 100644 index 0000000000..8f6f4b5f87 --- /dev/null +++ b/tests/test_models/test_encoder_decoder.py @@ -0,0 +1,2 @@ +# flake8: noqa +from hf_transformers.tests.models.encoder_decoder.test_modeling_encoder_decoder import * # Imported to execute model tests diff --git a/tests/test_models/test_gptj.py b/tests/test_models/test_gptj.py new file mode 100644 index 0000000000..5cd7610649 --- /dev/null +++ b/tests/test_models/test_gptj.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import GPTJAdapterModel +from hf_transformers.tests.models.gptj.test_modeling_gptj import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class GPTJAdapterModelTest(AdapterModelTesterMixin, GPTJModelTest): + all_model_classes = (GPTJAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_llama.py b/tests/test_models/test_llama.py new file mode 100644 index 0000000000..4246f048e7 --- /dev/null +++ b/tests/test_models/test_llama.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import LlamaAdapterModel +from hf_transformers.tests.models.llama.test_modeling_llama import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class LlamaAdapterModelTest(AdapterModelTesterMixin, LlamaModelTest): + all_model_classes = (LlamaAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_mbart.py b/tests/test_models/test_mbart.py new file mode 100644 index 0000000000..f874082af0 --- /dev/null +++ b/tests/test_models/test_mbart.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import MBartAdapterModel +from hf_transformers.tests.models.mbart.test_modeling_mbart import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class MBartAdapterModelTest(AdapterModelTesterMixin, MBartModelTest): + all_model_classes = (MBartAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_mistral.py b/tests/test_models/test_mistral.py new file mode 100644 index 0000000000..be66648c37 --- /dev/null +++ b/tests/test_models/test_mistral.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import MistralAdapterModel +from hf_transformers.tests.models.mistral.test_modeling_mistral import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class MistralAdapterModelTest(AdapterModelTesterMixin, MistralModelTest): + all_model_classes = (MistralAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_mt5.py b/tests/test_models/test_mt5.py new file mode 100644 index 0000000000..8d9f551e8b --- /dev/null +++ b/tests/test_models/test_mt5.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import MT5AdapterModel +from hf_transformers.tests.models.mt5.test_modeling_mt5 import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class MT5AdapterModelTest(AdapterModelTesterMixin, MT5IntegrationTest): + all_model_classes = (MT5AdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_plbart.py b/tests/test_models/test_plbart.py new file mode 100644 index 0000000000..7fbbfc38df --- /dev/null +++ b/tests/test_models/test_plbart.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import PLBartAdapterModel +from hf_transformers.tests.models.plbart.test_modeling_plbart import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class PLBartAdapterModelTest(AdapterModelTesterMixin, PLBartModelTest): + all_model_classes = (PLBartAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_roberta.py b/tests/test_models/test_roberta.py new file mode 100644 index 0000000000..e89886220f --- /dev/null +++ b/tests/test_models/test_roberta.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import RobertaAdapterModel +from hf_transformers.tests.models.roberta.test_modeling_roberta import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class RobertaAdapterModelTest(AdapterModelTesterMixin, RobertaModelTest): + all_model_classes = (RobertaAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_t5.py b/tests/test_models/test_t5.py new file mode 100644 index 0000000000..12d31a03e7 --- /dev/null +++ b/tests/test_models/test_t5.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import T5AdapterModel +from hf_transformers.tests.models.t5.test_modeling_t5 import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class T5AdapterModelTest(AdapterModelTesterMixin, T5ModelTest): + all_model_classes = (T5AdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_vit.py b/tests/test_models/test_vit.py new file mode 100644 index 0000000000..a5fc5a05bc --- /dev/null +++ b/tests/test_models/test_vit.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import ViTAdapterModel +from hf_transformers.tests.models.vit.test_modeling_vit import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class ViTAdapterModelTest(AdapterModelTesterMixin, ViTModelTest): + all_model_classes = (ViTAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_whisper.py b/tests/test_models/test_whisper.py new file mode 100644 index 0000000000..bfeea5a508 --- /dev/null +++ b/tests/test_models/test_whisper.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import WhisperAdapterModel +from hf_transformers.tests.models.whisper.test_modeling_whisper import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class WhisperAdapterModelTest(AdapterModelTesterMixin, WhisperModelTest): + all_model_classes = (WhisperAdapterModel,) + fx_compatible = False diff --git a/tests/test_models/test_xlm_roberta.py b/tests/test_models/test_xlm_roberta.py new file mode 100644 index 0000000000..8232515028 --- /dev/null +++ b/tests/test_models/test_xlm_roberta.py @@ -0,0 +1,2 @@ +# flake8: noqa +from hf_transformers.tests.models.xlm_roberta.test_modeling_xlm_roberta import * # Imported to execute model tests diff --git a/tests/test_models/test_xmod.py b/tests/test_models/test_xmod.py new file mode 100644 index 0000000000..2a0faa06b3 --- /dev/null +++ b/tests/test_models/test_xmod.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import XmodAdapterModel +from hf_transformers.tests.models.xmod.test_modeling_xmod import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class XmodAdapterModelTest(AdapterModelTesterMixin, XmodModelTest): + all_model_classes = (XmodAdapterModel,) + fx_compatible = False From 8dccda29e27868b81b146c6f3bdc0d227b724bef Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 7 Nov 2024 19:12:28 +0100 Subject: [PATCH 17/63] Refactoring: - Re-add bart adapter method tests - Remove duplicate added tests - In conversion test refactor model specific if else statements into model test class --- .../test_impl/core/test_adapter_conversion.py | 20 +-- tests/test_impl/heads/test_adapter_heads.py | 2 +- tests/test_methods/base.py | 32 +++- tests/test_methods/test_albert.py | 1 - tests/test_methods/test_bart.py | 149 ++++++++++++++++++ tests/test_methods/test_gpt2.py | 1 - 6 files changed, 183 insertions(+), 22 deletions(-) create mode 100644 tests/test_methods/test_bart.py diff --git a/tests/test_impl/core/test_adapter_conversion.py b/tests/test_impl/core/test_adapter_conversion.py index 9653b3f340..7bf44c1e95 100644 --- a/tests/test_impl/core/test_adapter_conversion.py +++ b/tests/test_impl/core/test_adapter_conversion.py @@ -110,23 +110,7 @@ def test_conversion_seq2seq_lm_model(self): ): self.skipTest("No seq2seq language modeling class.") - label_dict = {} - if self.is_speech_model: - # speech models require input_features - model = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING[self.config_class](self.config()) - label_dict["input_features"] = torch.randn( - (self.default_input_samples_shape), dtype=torch.float32, device=torch_device - ) - label_dict["decoder_input_ids"] = torch.randint( - 0, model.config.vocab_size, size=self.default_input_samples_shape[:-1], device=torch_device - ) - label_dict["labels"] = label_dict["decoder_input_ids"] - else: - model = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING[self.config_class](self.config()) - label_dict["labels"] = torch.zeros( - (self.batch_size, self.seq_length), dtype=torch.long, device=torch_device - ) - label_dict["decoder_input_ids"] = label_dict["labels"].clone() + model, label_dict = self.get_conversion_model() adapters.init(model) self.run_test(model, label_dict=label_dict) @@ -209,7 +193,7 @@ def test_equivalent_language_generation(self): model_gen = static_model.generate(**input_samples) flex_model_gen = flex_model.generate(**input_samples) - self.assertEquals(model_gen.shape, flex_model_gen.shape) + self.assertEqual(model_gen.shape, flex_model_gen.shape) self.assertTrue(torch.equal(model_gen, flex_model_gen)) def test_full_model_conversion(self): diff --git a/tests/test_impl/heads/test_adapter_heads.py b/tests/test_impl/heads/test_adapter_heads.py index ab0c813a5d..abbae3553e 100644 --- a/tests/test_impl/heads/test_adapter_heads.py +++ b/tests/test_impl/heads/test_adapter_heads.py @@ -183,7 +183,7 @@ def test_seq2seq_lm_head(self): seq_output_length = self.seq_length + 30 generated = model1.generate(input_ids, max_length=seq_output_length) self.assertTrue(generated.shape[1] <= seq_output_length) - self.assertEqual(generated.shape[0], 1) + self.assertEqual(generated.shape[0], self.input_shape[0]) def test_masked_lm_head(self): if "masked_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types: diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index 345cedf9f3..ffea96ad13 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -5,7 +5,14 @@ import adapters from adapters import AutoAdapterModel -from transformers import AutoFeatureExtractor, AutoTokenizer, GlueDataset, GlueDataTrainingArguments +from transformers import ( + MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, + MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING, + AutoFeatureExtractor, + AutoTokenizer, + GlueDataset, + GlueDataTrainingArguments, +) from transformers.testing_utils import torch_device @@ -67,6 +74,10 @@ def extract_input_ids(self, inputs): # TODO: Check if this is needed in all tests and if it differs between text, vision and speech models return inputs["input_ids"] + def get_conversion_model(self): + """Returns the respective conversion class of the adapter model for the conversion tests.""" + raise NotImplementedError("get_conversion_model() must be implemented in the subclass.") + class TextAdapterTestBase(AbstractAdapterTestBase): """Base class for adapter tests for text models. Text models test classes should inherit from this class and override the attributes and functions as needed.""" @@ -111,6 +122,13 @@ def get_dataset(self, tokenizer=None): ) return GlueDataset(data_args, tokenizer=tokenizer, mode="train") + def get_conversion_model(self): + label_dict = {} + model = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING[self.config_class](self.config()) + label_dict["labels"] = torch.zeros((self.batch_size, self.seq_length), dtype=torch.long, device=torch_device) + label_dict["decoder_input_ids"] = label_dict["labels"].clone() + return model, label_dict + class VisionAdapterTestBase(AbstractAdapterTestBase): """Base class for adapter tests for vision models. Vision models test classes should inherit from this class and override the attributes and functions as needed.""" @@ -192,3 +210,15 @@ def get_dataset(self, task_type: str = "seq2seq_lm", **kwargs): def extract_input_ids(self, inputs): return inputs["input_features"] + + def get_conversion_model(self): + label_dict = {} + model = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING[self.config_class](self.config()) + label_dict["input_features"] = torch.randn( + (self.default_input_samples_shape), dtype=torch.float32, device=torch_device + ) + label_dict["decoder_input_ids"] = torch.randint( + 0, model.config.vocab_size, size=self.default_input_samples_shape[:-1], device=torch_device + ) + label_dict["labels"] = label_dict["decoder_input_ids"] + return model, label_dict diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index 616680e537..62ff46bad3 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -24,7 +24,6 @@ class AlbertAdapterTestBase(TextAdapterTestBase): @pytest.mark.core class Core( AlbertAdapterTestBase, - ModelClassConversionTestMixin, CompabilityTestMixin, AdapterFusionModelTestMixin, unittest.TestCase, diff --git a/tests/test_methods/test_bart.py b/tests/test_methods/test_bart.py new file mode 100644 index 0000000000..80299af74b --- /dev/null +++ b/tests/test_methods/test_bart.py @@ -0,0 +1,149 @@ +from transformers import BartConfig + +from .imports import * + + +class BartAdapterTestBase(TextAdapterTestBase): + config_class = BartConfig + config = make_config( + BartConfig, + d_model=16, + encoder_layers=2, + decoder_layers=2, + encoder_attention_heads=4, + decoder_attention_heads=4, + encoder_ffn_dim=4, + decoder_ffn_dim=4, + ) + tokenizer_name = "facebook/bart-base" + + +@require_torch +@pytest.mark.core +class Core( + BartAdapterTestBase, + CompabilityTestMixin, + AdapterFusionModelTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.composition +class Composition( + BartAdapterTestBase, + ParallelAdapterInferenceTestMixin, + ParallelTrainingMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.heads +class Heads( + BartAdapterTestBase, + PredictionHeadModelTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.embeddings +class Embeddings( + BartAdapterTestBase, + EmbeddingTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +class BartClassConversionTest( + ModelClassConversionTestMixin, + BartAdapterTestBase, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.prefix_tuning +class PrefixTuning( + BartAdapterTestBase, + PrefixTuningTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.reft +class ReFT( + BartAdapterTestBase, + ReftTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.unipelt +class UniPELT( + BartAdapterTestBase, + UniPELTTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.compacter +class Compacter( + BartAdapterTestBase, + CompacterTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.bottleneck +class Bottleneck( + BartAdapterTestBase, + BottleneckAdapterTestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.ia3 +class IA3( + BartAdapterTestBase, + IA3TestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.lora +class LoRA( + BartAdapterTestBase, + LoRATestMixin, + unittest.TestCase, +): + pass + + +@require_torch +@pytest.mark.config_union +class ConfigUnion( + BartAdapterTestBase, + ConfigUnionAdapterTest, + unittest.TestCase, +): + pass diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_gpt2.py index 78573f8706..1eee687f9a 100644 --- a/tests/test_methods/test_gpt2.py +++ b/tests/test_methods/test_gpt2.py @@ -20,7 +20,6 @@ class GPT2AdapterTestBase(TextAdapterTestBase): @pytest.mark.core class Core( GPT2AdapterTestBase, - ModelClassConversionTestMixin, CompabilityTestMixin, AdapterFusionModelTestMixin, unittest.TestCase, From c665948675cee9c2d783a34de527d08340e76494 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 8 Nov 2024 11:39:42 +0100 Subject: [PATCH 18/63] Introduce generic test creator function --- conftest.py | 5 + setup.cfg | 2 +- tests/test_methods/imports.py | 24 ---- tests/test_methods/test_albert.py | 144 ++------------------ tests/test_methods/test_bart.py | 133 +------------------ tests/test_methods/test_gpt2.py | 125 +---------------- tests/test_methods/utils.py | 214 ++++++++++++++++++++++++++++++ 7 files changed, 243 insertions(+), 404 deletions(-) delete mode 100644 tests/test_methods/imports.py create mode 100644 tests/test_methods/utils.py diff --git a/conftest.py b/conftest.py index 93673cd209..d19e0ac3ae 100644 --- a/conftest.py +++ b/conftest.py @@ -83,3 +83,8 @@ def check_output(self, want, got, optionflags): doctest.OutputChecker = CustomOutputChecker + + +def pytest_collection_modifyitems(items): + # Exclude the 'test_class' group from the test collection since it's not a real test class and byproduct of the generic test class generation. + items[:] = [item for item in items if 'test_class' not in item.nodeid] \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 1e141b3da2..1323b957d7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,7 +50,7 @@ use_parentheses = True ignore = E203, E501, E731, E741, W503, W605 max-line-length = 119 per-file-ignores = - tests/test_methods/imports.py: F401, F403, F405 + tests/test_methods/utils.py: F401, F403, F405 tests/test_methods/test_*.py:F403,F405 [tool:pytest] diff --git a/tests/test_methods/imports.py b/tests/test_methods/imports.py deleted file mode 100644 index 59df3990f8..0000000000 --- a/tests/test_methods/imports.py +++ /dev/null @@ -1,24 +0,0 @@ -import unittest -from math import ceil - -import pytest - -from tests.test_impl.composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from tests.test_impl.core.test_adapter_backward_compability import CompabilityTestMixin -from tests.test_impl.core.test_adapter_conversion import ModelClassConversionTestMixin -from tests.test_impl.core.test_adapter_fusion_common import AdapterFusionModelTestMixin -from tests.test_impl.embeddings.test_adapter_embeddings import EmbeddingTestMixin -from tests.test_impl.heads.test_adapter_heads import PredictionHeadModelTestMixin -from tests.test_impl.peft.test_adapter_common import BottleneckAdapterTestMixin -from tests.test_impl.peft.test_compacter import CompacterTestMixin -from tests.test_impl.peft.test_config_union import ConfigUnionAdapterTest -from tests.test_impl.peft.test_ia3 import IA3TestMixin -from tests.test_impl.peft.test_lora import LoRATestMixin -from tests.test_impl.peft.test_prefix_tuning import PrefixTuningTestMixin -from tests.test_impl.peft.test_prompt_tuning import PromptTuningTestMixin -from tests.test_impl.peft.test_reft import ReftTestMixin -from tests.test_impl.peft.test_unipelt import UniPELTTestMixin -from tests.test_impl.utils import make_config -from tests.test_methods.base import TextAdapterTestBase -from transformers import AutoTokenizer -from transformers.testing_utils import require_torch diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index 62ff46bad3..aea01d0170 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -1,6 +1,16 @@ +import unittest + from transformers import AlbertConfig -from .imports import * +from .utils import ( + PredictionHeadModelTestMixin, + TextAdapterTestBase, + ceil, + generate_method_tests, + make_config, + pytest, + require_torch, +) class AlbertAdapterTestBase(TextAdapterTestBase): @@ -20,26 +30,10 @@ class AlbertAdapterTestBase(TextAdapterTestBase): leave_out_layers = [0] -@require_torch -@pytest.mark.core -class Core( - AlbertAdapterTestBase, - CompabilityTestMixin, - AdapterFusionModelTestMixin, - unittest.TestCase, -): - pass - +method_tests = generate_method_tests(AlbertAdapterTestBase) -@require_torch -@pytest.mark.composition -class Composition( - AlbertAdapterTestBase, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - unittest.TestCase, -): - pass +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class @require_torch @@ -52,113 +46,3 @@ class Heads( def test_context_simple(self): expected_number_of_adapter_calls = ceil(self.config().num_hidden_layers / self.config().num_hidden_groups) super().test_context_simple(expected_number_of_adapter_calls=expected_number_of_adapter_calls) - - -@require_torch -@pytest.mark.embeddings -class Embeddings( - AlbertAdapterTestBase, - EmbeddingTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.class_conversion -class ClassConversion( - ModelClassConversionTestMixin, - AlbertAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.prefix_tuning -class PrefixTuning( - AlbertAdapterTestBase, - PrefixTuningTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.prompt_tuning -class PromptTuning( - AlbertAdapterTestBase, - PromptTuningTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.reft -class ReFT( - AlbertAdapterTestBase, - ReftTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.unipelt -class UniPELT( - AlbertAdapterTestBase, - UniPELTTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.compacter -class Compacter( - AlbertAdapterTestBase, - CompacterTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.bottleneck -class Bottleneck( - AlbertAdapterTestBase, - BottleneckAdapterTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.ia3 -class IA3( - AlbertAdapterTestBase, - IA3TestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.lora -class LoRA( - AlbertAdapterTestBase, - LoRATestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.config_union -class ConfigUnion( - AlbertAdapterTestBase, - ConfigUnionAdapterTest, - unittest.TestCase, -): - pass diff --git a/tests/test_methods/test_bart.py b/tests/test_methods/test_bart.py index 80299af74b..b4f46e8cf9 100644 --- a/tests/test_methods/test_bart.py +++ b/tests/test_methods/test_bart.py @@ -1,6 +1,6 @@ from transformers import BartConfig -from .imports import * +from .utils import * class BartAdapterTestBase(TextAdapterTestBase): @@ -18,132 +18,7 @@ class BartAdapterTestBase(TextAdapterTestBase): tokenizer_name = "facebook/bart-base" -@require_torch -@pytest.mark.core -class Core( - BartAdapterTestBase, - CompabilityTestMixin, - AdapterFusionModelTestMixin, - unittest.TestCase, -): - pass +method_tests = generate_method_tests(BartAdapterTestBase, excluded_tests=["PromptTuning"]) - -@require_torch -@pytest.mark.composition -class Composition( - BartAdapterTestBase, - ParallelAdapterInferenceTestMixin, - ParallelTrainingMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.heads -class Heads( - BartAdapterTestBase, - PredictionHeadModelTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.embeddings -class Embeddings( - BartAdapterTestBase, - EmbeddingTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -class BartClassConversionTest( - ModelClassConversionTestMixin, - BartAdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.prefix_tuning -class PrefixTuning( - BartAdapterTestBase, - PrefixTuningTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.reft -class ReFT( - BartAdapterTestBase, - ReftTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.unipelt -class UniPELT( - BartAdapterTestBase, - UniPELTTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.compacter -class Compacter( - BartAdapterTestBase, - CompacterTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.bottleneck -class Bottleneck( - BartAdapterTestBase, - BottleneckAdapterTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.ia3 -class IA3( - BartAdapterTestBase, - IA3TestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.lora -class LoRA( - BartAdapterTestBase, - LoRATestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.config_union -class ConfigUnion( - BartAdapterTestBase, - ConfigUnionAdapterTest, - unittest.TestCase, -): - pass +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_gpt2.py index 1eee687f9a..848ae86999 100644 --- a/tests/test_methods/test_gpt2.py +++ b/tests/test_methods/test_gpt2.py @@ -1,6 +1,6 @@ from transformers import GPT2Config -from .imports import * +from .utils import * class GPT2AdapterTestBase(TextAdapterTestBase): @@ -16,15 +16,10 @@ class GPT2AdapterTestBase(TextAdapterTestBase): tokenizer_name = "gpt2" -@require_torch -@pytest.mark.core -class Core( - GPT2AdapterTestBase, - CompabilityTestMixin, - AdapterFusionModelTestMixin, - unittest.TestCase, -): - pass +method_tests = generate_method_tests(GPT2AdapterTestBase, excluded_tests=["PromptTuning"]) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class @require_torch @@ -37,113 +32,3 @@ class Composition( ): def test_parallel_training_lora(self): self.skipTest("Not supported for GPT2") - - -@require_torch -@pytest.mark.heads -class Heads( - GPT2AdapterTestBase, - PredictionHeadModelTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.embeddings -class Embeddings( - GPT2AdapterTestBase, - EmbeddingTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.class_conversion -class ClassConversion( - ModelClassConversionTestMixin, - GPT2AdapterTestBase, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.prefix_tuning -class PrefixTuning( - GPT2AdapterTestBase, - PrefixTuningTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.reft -class ReFT( - GPT2AdapterTestBase, - ReftTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.unipelt -class UniPELT( - GPT2AdapterTestBase, - UniPELTTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.compacter -class Compacter( - GPT2AdapterTestBase, - CompacterTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.bottleneck -class Bottleneck( - GPT2AdapterTestBase, - BottleneckAdapterTestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.ia3 -class IA3( - GPT2AdapterTestBase, - IA3TestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.lora -class LoRA( - GPT2AdapterTestBase, - LoRATestMixin, - unittest.TestCase, -): - pass - - -@require_torch -@pytest.mark.config_union -class ConfigUnion( - GPT2AdapterTestBase, - ConfigUnionAdapterTest, - unittest.TestCase, -): - pass diff --git a/tests/test_methods/utils.py b/tests/test_methods/utils.py new file mode 100644 index 0000000000..c58bf27609 --- /dev/null +++ b/tests/test_methods/utils.py @@ -0,0 +1,214 @@ +import inspect +import sys +import unittest +from math import ceil + +import pytest + +from tests.test_impl.composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin +from tests.test_impl.core.test_adapter_backward_compability import CompabilityTestMixin +from tests.test_impl.core.test_adapter_conversion import ModelClassConversionTestMixin +from tests.test_impl.core.test_adapter_fusion_common import AdapterFusionModelTestMixin +from tests.test_impl.embeddings.test_adapter_embeddings import EmbeddingTestMixin +from tests.test_impl.heads.test_adapter_heads import PredictionHeadModelTestMixin +from tests.test_impl.peft.test_adapter_common import BottleneckAdapterTestMixin +from tests.test_impl.peft.test_compacter import CompacterTestMixin +from tests.test_impl.peft.test_config_union import ConfigUnionAdapterTest +from tests.test_impl.peft.test_ia3 import IA3TestMixin +from tests.test_impl.peft.test_lora import LoRATestMixin +from tests.test_impl.peft.test_prefix_tuning import PrefixTuningTestMixin +from tests.test_impl.peft.test_prompt_tuning import PromptTuningTestMixin +from tests.test_impl.peft.test_reft import ReftTestMixin +from tests.test_impl.peft.test_unipelt import UniPELTTestMixin +from tests.test_impl.utils import make_config +from tests.test_methods.base import TextAdapterTestBase, VisionAdapterTestBase +from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer +from transformers.testing_utils import require_torch + + +def generate_method_tests( + model_test_base, + excluded_tests=[], +): + """ + Generates a set of test classes for a given model test base. + + Args: + model_test_base (type): The base class for the model tests. + excluded_mixins (list, optional): A list of mixin classes to exclude from the test classes. + + Returns: + dict: A dictionary mapping test class names to the generated test classes. + """ + test_classes = {} + + @require_torch + @pytest.mark.core + class Core( + model_test_base, + CompabilityTestMixin, + AdapterFusionModelTestMixin, + unittest.TestCase, + ): + pass + + if "Core" not in excluded_tests: + test_classes["Core"] = Core + + @require_torch + @pytest.mark.heads + class Heads( + model_test_base, + PredictionHeadModelTestMixin, + unittest.TestCase, + ): + pass + + if "Heads" not in excluded_tests: + test_classes["Heads"] = Heads + + @require_torch + @pytest.mark.embeddings + class Embeddings( + model_test_base, + EmbeddingTestMixin, + unittest.TestCase, + ): + pass + + if "Embeddings" not in excluded_tests: + test_classes["Embeddings"] = Embeddings + + @require_torch + @pytest.mark.composition + class Composition( + model_test_base, + ParallelAdapterInferenceTestMixin, + ParallelTrainingMixin, + unittest.TestCase, + ): + pass + + if "Composition" not in excluded_tests: + test_classes["Composition"] = Composition + + @require_torch + class ClassConversion( + ModelClassConversionTestMixin, + model_test_base, + unittest.TestCase, + ): + pass + + if "ClassConversion" not in excluded_tests: + test_classes["ClassConversion"] = ClassConversion + + @require_torch + @pytest.mark.prefix_tuning + class PrefixTuning( + model_test_base, + PrefixTuningTestMixin, + unittest.TestCase, + ): + pass + + if "PrefixTuning" not in excluded_tests: + test_classes["PrefixTuning"] = PrefixTuning + + @require_torch + @pytest.mark.prompt_tuning + class PromptTuning( + model_test_base, + PromptTuningTestMixin, + unittest.TestCase, + ): + pass + + if "PromptTuning" not in excluded_tests: + test_classes["PromptTuning"] = PromptTuning + + @require_torch + @pytest.mark.reft + class ReFT( + model_test_base, + ReftTestMixin, + unittest.TestCase, + ): + pass + + if "ReFT" not in excluded_tests: + test_classes["ReFT"] = ReFT + + @require_torch + @pytest.mark.unipelt + class UniPELT( + model_test_base, + UniPELTTestMixin, + unittest.TestCase, + ): + pass + + if "UniPELT" not in excluded_tests: + test_classes["UniPELT"] = UniPELT + + @require_torch + @pytest.mark.compacter + class Compacter( + model_test_base, + CompacterTestMixin, + unittest.TestCase, + ): + pass + + if "Compacter" not in excluded_tests: + test_classes["Compacter"] = Compacter + + @require_torch + @pytest.mark.bottleneck + class Bottleneck( + model_test_base, + BottleneckAdapterTestMixin, + unittest.TestCase, + ): + pass + + if "Bottleneck" not in excluded_tests: + test_classes["Bottleneck"] = Bottleneck + + @require_torch + @pytest.mark.ia3 + class IA3( + model_test_base, + IA3TestMixin, + unittest.TestCase, + ): + pass + + if "IA3" not in excluded_tests: + test_classes["IA3"] = IA3 + + @require_torch + @pytest.mark.lora + class LoRA( + model_test_base, + LoRATestMixin, + unittest.TestCase, + ): + pass + + if "LoRA" not in excluded_tests: + test_classes["LoRA"] = LoRA + + @require_torch + @pytest.mark.config_union + class ConfigUnion( + model_test_base, + ConfigUnionAdapterTest, + unittest.TestCase, + ): + pass + + if "ConfigUnion" not in excluded_tests: + test_classes["ConfigUnion"] = ConfigUnion + + return test_classes From fb425b686dfc6146f060aad792553d5930e40e7a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 8 Nov 2024 11:40:02 +0100 Subject: [PATCH 19/63] Re-add beit adapter method tests --- tests/test_impl/heads/test_adapter_heads.py | 9 ++++++++- tests/test_methods/base.py | 7 ++++--- tests/test_methods/test_beit.py | 22 +++++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 tests/test_methods/test_beit.py diff --git a/tests/test_impl/heads/test_adapter_heads.py b/tests/test_impl/heads/test_adapter_heads.py index abbae3553e..a0a74d364c 100644 --- a/tests/test_impl/heads/test_adapter_heads.py +++ b/tests/test_impl/heads/test_adapter_heads.py @@ -78,7 +78,14 @@ def test_image_classification_head(self): model1.add_image_classification_head("dummy") label_dict = {} label_dict["labels"] = torch.zeros(self.batch_size, dtype=torch.long, device=torch_device) - self.run_prediction_head_test(model1, model2, "dummy", input_shape=(1, 3, 224, 224), label_dict=label_dict) + self.run_prediction_head_test( + model1, + model2, + "dummy", + input_shape=self.input_shape, + label_dict=label_dict, + output_shape=(self.batch_size, 2), + ) def test_multiple_choice_head(self): if "multiple_choice" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types: diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index ffea96ad13..b91e270a91 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -134,10 +134,11 @@ class VisionAdapterTestBase(AbstractAdapterTestBase): """Base class for adapter tests for vision models. Vision models test classes should inherit from this class and override the attributes and functions as needed.""" input_shape = (3, 3, 224, 224) + batch_size = 3 def get_input_samples(self, shape=None, config=None, dtype=torch.float, **kwargs): shape = shape or self.input_shape - pixel_values = self.build_random_tensor(shape, dtype=dtype) + pixel_values = self.build_rand_tensor(shape, dtype=dtype) return {"pixel_values": pixel_values} def add_head(self, model, name, **kwargs): @@ -192,11 +193,11 @@ def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): def get_input_samples(self, shape=None, config=None, **kwargs): shape = shape or self.default_input_samples_shape - in_data = {"input_features": self.build_random_tensor(shape, dtype=torch.float)} + in_data = {"input_features": self.build_rand_tensor(shape, dtype=torch.float)} # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: - in_data["decoder_input_ids"] = self.build_random_tensor( + in_data["decoder_input_ids"] = self.build_rand_tensor( (shape[:-1]), dtype=torch.long, vocab_size=config.vocab_size ) return in_data diff --git a/tests/test_methods/test_beit.py b/tests/test_methods/test_beit.py new file mode 100644 index 0000000000..befec4b3e0 --- /dev/null +++ b/tests/test_methods/test_beit.py @@ -0,0 +1,22 @@ +from transformers import BeitConfig + +from .utils import VisionAdapterTestBase, generate_method_tests, make_config + + +class BeitAdapterTestBase(VisionAdapterTestBase): + config_class = BeitConfig + config = make_config( + BeitConfig, + image_size=224, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + feature_extractor_name = "microsoft/beit-base-patch16-224-pt22k" + + +method_tests = generate_method_tests(BeitAdapterTestBase, excluded_tests=["Composition", "Embeddings"]) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class From 225439c087c57498eb792fc43474e5bc90ae6050 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Sat, 9 Nov 2024 12:05:56 +0100 Subject: [PATCH 20/63] Refactor & Re-add bertgeneration and bert --- tests/test_impl/composition/test_parallel.py | 10 +-- tests/test_impl/heads/test_adapter_heads.py | 6 +- tests/test_methods/base.py | 31 ++++++-- tests/test_methods/test_bert.py | 21 ++++++ tests/test_methods/test_bert_generation.py | 78 ++++++++++++++++++++ tests/test_methods/utils.py | 3 +- 6 files changed, 133 insertions(+), 16 deletions(-) create mode 100644 tests/test_methods/test_bert.py create mode 100644 tests/test_methods/test_bert_generation.py diff --git a/tests/test_impl/composition/test_parallel.py b/tests/test_impl/composition/test_parallel.py index 0e97791cd9..f6038b572c 100644 --- a/tests/test_impl/composition/test_parallel.py +++ b/tests/test_impl/composition/test_parallel.py @@ -229,12 +229,7 @@ def run_parallel_training_equivalent_to_single(self, adapter_config): a1, a2 = self.create_twin_adapters(model, "a", adapter_config) b1, b2 = self.create_twin_adapters(model, "b", adapter_config) - # TODO: refactor this dataset creation into an own method - dataset = [] - for i in range(3): - input_data = self.get_input_samples(config=model.config) - input_data["labels"] = self.build_rand_ids_tensor((3, 1), 2) - dataset.append(input_data) + dataset = self.get_dataset_non_batched(model.config) for adapter in [a1, b1]: model.active_head = adapter @@ -292,8 +287,7 @@ def test_parallel_training_single_forward_pass(self): input_data = self.get_input_samples( config=model.config, ) - - input_data["labels"] = self.build_rand_ids_tensor((3, 1), 2) + input_data = self.attach_labels(input_data) outputs = [] for adapter in [a1, b1]: diff --git a/tests/test_impl/heads/test_adapter_heads.py b/tests/test_impl/heads/test_adapter_heads.py index a0a74d364c..c3ceb2adfd 100644 --- a/tests/test_impl/heads/test_adapter_heads.py +++ b/tests/test_impl/heads/test_adapter_heads.py @@ -246,7 +246,11 @@ def test_dependency_parsing_head(self): (self.batch_size, self.seq_length), dtype=torch.long, device=torch_device ) self.run_prediction_head_test( - model1, model2, "dummy", output_shape=(1, self.seq_length, self.seq_length + 1, 2), label_dict=label_dict + model1, + model2, + "dummy", + output_shape=(self.batch_size, self.seq_length, self.seq_length + 1, 2), + label_dict=label_dict, ) def test_delete_head(self): diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index b91e270a91..569e493f01 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -28,6 +28,7 @@ class AbstractAdapterTestBase: input_shape_generate = () # (batch_size, seq_length) leave_out_layers = [] do_run_train_tests = True + num_labels = 2 def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): """Creates a dummy batch of samples in the format required for the model.""" @@ -41,6 +42,14 @@ def get_dataset(self, **kwargs): """Loads a dummy dataset for the model.""" raise NotImplementedError("get_dataset() must be implemented in the subclass.") + def get_dataset_non_batched(self): + """Builds a non-batched dummy dataset for the model.""" + raise NotImplementedError("build_dummy_dataset() must be implemented in the subclass.") + + def attach_labels(self, inputs): + """Attaches labels to the input samples.""" + raise NotImplementedError("attach_labels() with respective label shape must be implemented in the subclass.") + def get_model(self): """Builds a model instance for testing based on the provied model configuration.""" if self.model_class == AutoAdapterModel: @@ -122,6 +131,18 @@ def get_dataset(self, tokenizer=None): ) return GlueDataset(data_args, tokenizer=tokenizer, mode="train") + def get_dataset_non_batched(self, config): + dataset = [] + for i in range(3): + input_data = self.get_input_samples(config=config) + input_data["labels"] = self.build_rand_ids_tensor((3, 1), self.num_labels) + dataset.append(input_data) + return dataset + + def attach_labels(self, inputs): + inputs["labels"] = torch.randint(0, 2, (self.batch_size, 1), device=torch_device) + return inputs + def get_conversion_model(self): label_dict = {} model = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING[self.config_class](self.config()) @@ -187,12 +208,12 @@ def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): elif head_type == "seq2seq_lm": kwargs.pop("num_labels", 1) # Remove num_labels from kwargs if present in the tests model.add_seq2seq_lm_head(name, **kwargs) - return self.default_input_samples_shape[1] # Return the number of mel features + return self.input_shape[1] # Return the number of mel features else: raise ValueError(f"Head type {head_type} not supported.") def get_input_samples(self, shape=None, config=None, **kwargs): - shape = shape or self.default_input_samples_shape + shape = shape or self.input_shape in_data = {"input_features": self.build_rand_tensor(shape, dtype=torch.float)} # Add decoder input ids for models with a decoder @@ -215,11 +236,9 @@ def extract_input_ids(self, inputs): def get_conversion_model(self): label_dict = {} model = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING[self.config_class](self.config()) - label_dict["input_features"] = torch.randn( - (self.default_input_samples_shape), dtype=torch.float32, device=torch_device - ) + label_dict["input_features"] = torch.randn((self.input_shape), dtype=torch.float32, device=torch_device) label_dict["decoder_input_ids"] = torch.randint( - 0, model.config.vocab_size, size=self.default_input_samples_shape[:-1], device=torch_device + 0, model.config.vocab_size, size=self.input_shape[:-1], device=torch_device ) label_dict["labels"] = label_dict["decoder_input_ids"] return model, label_dict diff --git a/tests/test_methods/test_bert.py b/tests/test_methods/test_bert.py new file mode 100644 index 0000000000..04b00f0c38 --- /dev/null +++ b/tests/test_methods/test_bert.py @@ -0,0 +1,21 @@ +from transformers import BertConfig + +from .utils import TextAdapterTestBase, generate_method_tests, make_config + + +class BertAdapterTestBase(TextAdapterTestBase): + config_class = BertConfig + config = make_config( + BertConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "bert-base-uncased" + + +method_tests = generate_method_tests(BertAdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_bert_generation.py b/tests/test_methods/test_bert_generation.py new file mode 100644 index 0000000000..2a3bb4b4ff --- /dev/null +++ b/tests/test_methods/test_bert_generation.py @@ -0,0 +1,78 @@ +from datasets import load_dataset + +from transformers import AutoTokenizer, BertGenerationConfig + +from .utils import TextAdapterTestBase, generate_method_tests, make_config, torch, torch_device + + +class BertGenerationAdapterTestBase(TextAdapterTestBase): + config_class = BertGenerationConfig + config = make_config( + BertGenerationConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "bert-base-uncased" + + def add_head(self, model, name, **kwargs): + model.add_masked_lm_head(name) + return self.input_shape[-1] + + def get_dataset(self, tokenizer=None): + # setup tokenizer + if tokenizer is None: + tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + def preprocess_function(examples): + inputs = examples["document"] + targets = examples["summary"] + inputs = ["Summarize: " + inp for inp in inputs] + model_inputs = tokenizer(inputs, padding="max_length", truncation=True, max_length=128) + + # Setup the tokenizer for targets + with tokenizer.as_target_tokenizer(): + labels = tokenizer(targets, padding="max_length", truncation=True, max_length=128) + + # If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore + # padding in the loss. + labels["input_ids"] = [ + [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"] + ] + + model_inputs["labels"] = labels["input_ids"] + return model_inputs + + data_args = { + "task_name": "xsum", + "path": "./tests/fixtures/samples/xsum/sample.json", + } + dataset = load_dataset("json", data_files=data_args["path"]) + train_dataset = dataset["train"] + train_dataset = train_dataset.map( + preprocess_function, + batched=True, + desc="Running tokenizer on train dataset", + ) + return train_dataset + + def get_dataset_non_batched(self, config): + dataset = [] + for i in range(3): + input_data = self.get_input_samples(config=config) + input_data = self.attach_labels(input_data) + dataset.append(input_data) + return dataset + + def attach_labels(self, inputs): + inputs["labels"] = torch.randint(0, 2, (self.batch_size, 64), device=torch_device) + return inputs + + +method_tests = generate_method_tests(BertGenerationAdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/utils.py b/tests/test_methods/utils.py index c58bf27609..6ba783dae9 100644 --- a/tests/test_methods/utils.py +++ b/tests/test_methods/utils.py @@ -4,6 +4,7 @@ from math import ceil import pytest +import torch from tests.test_impl.composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin from tests.test_impl.core.test_adapter_backward_compability import CompabilityTestMixin @@ -23,7 +24,7 @@ from tests.test_impl.utils import make_config from tests.test_methods.base import TextAdapterTestBase, VisionAdapterTestBase from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer -from transformers.testing_utils import require_torch +from transformers.testing_utils import require_torch, torch_device def generate_method_tests( From 09f9cdc9012670e0aaa15cd9863fe3e2e7b03956 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 11 Nov 2024 09:33:20 +0100 Subject: [PATCH 21/63] Re-add clip tests --- tests/test_methods/test_clip_text.py | 142 +++++++++++++++++++++++++ tests/test_methods/test_clip_vision.py | 65 +++++++++++ 2 files changed, 207 insertions(+) create mode 100644 tests/test_methods/test_clip_text.py create mode 100644 tests/test_methods/test_clip_vision.py diff --git a/tests/test_methods/test_clip_text.py b/tests/test_methods/test_clip_text.py new file mode 100644 index 0000000000..8a0b05c80e --- /dev/null +++ b/tests/test_methods/test_clip_text.py @@ -0,0 +1,142 @@ +import random + +from transformers import CLIPConfig, CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPVisionConfig + +from .utils import * + + +class CLIPTextAdapterTestBase(TextAdapterTestBase): + model_class = CLIPTextModel + config_class = CLIPTextConfig + config = make_config( + CLIPTextConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "openai/clip-vit-base-patch32" + + +@require_torch +class CLIPTextAdapterTest( + BottleneckAdapterTestMixin, + CompacterTestMixin, + IA3TestMixin, + LoRATestMixin, + PrefixTuningTestMixin, + ReftTestMixin, + UniPELTTestMixin, + AdapterFusionModelTestMixin, + CompabilityTestMixin, + CLIPTextAdapterTestBase, + unittest.TestCase, +): + pass + + +class CLIPTextWithProjectionAdapterTestBase(TextAdapterTestBase): + model_class = CLIPTextModelWithProjection + config_class = CLIPTextConfig + config = make_config( + CLIPTextConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "openai/clip-vit-base-patch32" + + +@require_torch +class CLIPTextWithProjectionAdapterTest( + BottleneckAdapterTestMixin, + CompacterTestMixin, + IA3TestMixin, + LoRATestMixin, + PrefixTuningTestMixin, + ReftTestMixin, + UniPELTTestMixin, + AdapterFusionModelTestMixin, + CompabilityTestMixin, + CLIPTextWithProjectionAdapterTestBase, + unittest.TestCase, +): + pass + + +class CLIPAdapterTestBase(TextAdapterTestBase): + config_class = CLIPConfig + config = staticmethod( + lambda: CLIPConfig.from_text_vision_configs( + CLIPTextConfig( + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ), + CLIPVisionConfig( + image_size=30, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ), + ) + ) + tokenizer_name = "openai/clip-vit-base-patch32" + # Default shape of inputs to use + default_text_input_samples_shape = (3, 64) + default_vision_input_samples_shape = (3, 3, 224, 224) + do_run_train_tests = False + + def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **kwargs): + # text inputs + shape = self.default_text_input_samples_shape + total_dims = 1 + for dim in shape: + total_dims *= dim + values = [] + for _ in range(total_dims): + values.append(random.randint(0, vocab_size - 1)) + input_ids = torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() + # this is needed e.g. for BART + if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: + input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) + input_ids[:, -1] = config.eos_token_id + in_data = {"input_ids": input_ids} + + # vision inputs + shape = self.default_vision_input_samples_shape + total_dims = 1 + for dim in shape: + total_dims *= dim + values = [] + for _ in range(total_dims): + values.append(random.random()) + pixel_values = torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() + in_data["pixel_values"] = pixel_values + + return in_data + + def add_head(self, *args, **kwargs): + pass + + +@require_torch +class CLIPAdapterTest( + BottleneckAdapterTestMixin, + CompacterTestMixin, + IA3TestMixin, + LoRATestMixin, + PrefixTuningTestMixin, + ReftTestMixin, + UniPELTTestMixin, + AdapterFusionModelTestMixin, + CompabilityTestMixin, + CLIPAdapterTestBase, + unittest.TestCase, +): + def test_adapter_fusion_save_with_head(self): + # This test is not applicable to CLIP + self.skipTest("Not applicable to CLIP.") diff --git a/tests/test_methods/test_clip_vision.py b/tests/test_methods/test_clip_vision.py new file mode 100644 index 0000000000..3686c20ae6 --- /dev/null +++ b/tests/test_methods/test_clip_vision.py @@ -0,0 +1,65 @@ +from transformers import CLIPVisionConfig, CLIPVisionModel, CLIPVisionModelWithProjection + +from .utils import * + + +class CLIPVisionAdapterTestBase(VisionAdapterTestBase): + model_class = CLIPVisionModel + config_class = CLIPVisionConfig + config = make_config( + CLIPVisionConfig, + image_size=30, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + feature_extractor_name = "openai/clip-vit-base-patch32" + + +@require_torch +class CLIPVisionAdapterTest( + BottleneckAdapterTestMixin, + CompacterTestMixin, + IA3TestMixin, + LoRATestMixin, + PrefixTuningTestMixin, + ReftTestMixin, + UniPELTTestMixin, + AdapterFusionModelTestMixin, + CompabilityTestMixin, + CLIPVisionAdapterTestBase, + unittest.TestCase, +): + pass + + +class CLIPVisionWithProjectionAdapterTestBase(VisionAdapterTestBase): + model_class = CLIPVisionModelWithProjection + config_class = CLIPVisionConfig + config = make_config( + CLIPVisionConfig, + image_size=30, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + feature_extractor_name = "openai/clip-vit-base-patch32" + + +@require_torch +class CLIPVisionWithProjectionAdapterTest( + BottleneckAdapterTestMixin, + CompacterTestMixin, + IA3TestMixin, + LoRATestMixin, + PrefixTuningTestMixin, + ReftTestMixin, + UniPELTTestMixin, + AdapterFusionModelTestMixin, + CompabilityTestMixin, + CLIPVisionWithProjectionAdapterTestBase, + unittest.TestCase, +): + pass From 7934350d4895925c42f6e0294c92e35d405ec21a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 11 Nov 2024 12:29:14 +0100 Subject: [PATCH 22/63] Re-add: - deberta - debertav2 - distilbert - electra - encoder-decoder - llama - mbart - mistral - mt5 - plbart - roberta --- .../embeddings/test_adapter_embeddings.py | 2 +- tests/test_methods/test_deberta.py | 27 +++++++ tests/test_methods/test_debertaV2.py | 24 +++++++ tests/test_methods/test_distilbert.py | 21 ++++++ tests/test_methods/test_electra.py | 22 ++++++ tests/test_methods/test_encoder_decoder.py | 72 +++++++++++++++++++ tests/test_methods/test_llama.py | 33 +++++++++ tests/test_methods/test_mbart.py | 26 +++++++ tests/test_methods/test_mistral.py | 24 +++++++ tests/test_methods/test_mt5.py | 26 +++++++ tests/test_methods/test_plbart.py | 25 +++++++ tests/test_methods/test_roberta.py | 22 ++++++ 12 files changed, 323 insertions(+), 1 deletion(-) create mode 100644 tests/test_methods/test_deberta.py create mode 100644 tests/test_methods/test_debertaV2.py create mode 100644 tests/test_methods/test_distilbert.py create mode 100644 tests/test_methods/test_electra.py create mode 100644 tests/test_methods/test_encoder_decoder.py create mode 100644 tests/test_methods/test_llama.py create mode 100644 tests/test_methods/test_mbart.py create mode 100644 tests/test_methods/test_mistral.py create mode 100644 tests/test_methods/test_mt5.py create mode 100644 tests/test_methods/test_plbart.py create mode 100644 tests/test_methods/test_roberta.py diff --git a/tests/test_impl/embeddings/test_adapter_embeddings.py b/tests/test_impl/embeddings/test_adapter_embeddings.py index b40b6bb0d9..71b24292e3 100644 --- a/tests/test_impl/embeddings/test_adapter_embeddings.py +++ b/tests/test_impl/embeddings/test_adapter_embeddings.py @@ -66,7 +66,7 @@ def test_save_load_embedding(self): torch.equal(model.loaded_embeddings["test"].weight, model.loaded_embeddings["test_reloaded"].weight) ) self.assertTrue(torch.equal(output1[0], output2[0])) - self.assertEqual(tokenizer.vocab, tokenizer_ref.vocab) + self.assertEqual(tokenizer.get_vocab(), tokenizer_ref.get_vocab()) def test_back_to_default(self): model = self.get_model() diff --git a/tests/test_methods/test_deberta.py b/tests/test_methods/test_deberta.py new file mode 100644 index 0000000000..d53ae85f21 --- /dev/null +++ b/tests/test_methods/test_deberta.py @@ -0,0 +1,27 @@ +from transformers import DebertaConfig + +from .utils import * + + +class DebertaAdapterTestBase(TextAdapterTestBase): + config_class = DebertaConfig + config = make_config( + DebertaConfig, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + relative_attention=True, + pos_att_type="p2c|c2p", + ) + tokenizer_name = "microsoft/deberta-base" + + def test_parallel_training_lora(self): + self.skipTest("Not supported for DeBERTa") + + +method_tests = generate_method_tests(DebertaAdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_debertaV2.py b/tests/test_methods/test_debertaV2.py new file mode 100644 index 0000000000..a6ef99b104 --- /dev/null +++ b/tests/test_methods/test_debertaV2.py @@ -0,0 +1,24 @@ +from transformers import DebertaV2Config + +from .utils import * + + +class DebertaV2AdapterTestBase(TextAdapterTestBase): + config_class = DebertaV2Config + config = make_config( + DebertaV2Config, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + relative_attention=True, + pos_att_type="p2c|c2p", + ) + tokenizer_name = "microsoft/deberta-v3-base" + + +method_tests = generate_method_tests(DebertaV2AdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_distilbert.py b/tests/test_methods/test_distilbert.py new file mode 100644 index 0000000000..668d6e8b01 --- /dev/null +++ b/tests/test_methods/test_distilbert.py @@ -0,0 +1,21 @@ +from transformers import DistilBertConfig + +from .utils import * + + +class DistilBertAdapterTestBase(TextAdapterTestBase): + config_class = DistilBertConfig + config = make_config( + DistilBertConfig, + dim=32, + n_layers=4, + n_heads=4, + hidden_dim=37, + ) + tokenizer_name = "distilbert-base-uncased" + + +method_tests = generate_method_tests(DistilBertAdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_electra.py b/tests/test_methods/test_electra.py new file mode 100644 index 0000000000..19d6622b93 --- /dev/null +++ b/tests/test_methods/test_electra.py @@ -0,0 +1,22 @@ +from transformers import ElectraConfig + +from .utils import * + + +class ElectraAdapterTestBase(TextAdapterTestBase): + config_class = ElectraConfig + config = make_config( + ElectraConfig, + # vocab_size=99, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "google/electra-base-generator" + + +method_tests = generate_method_tests(ElectraAdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_encoder_decoder.py b/tests/test_methods/test_encoder_decoder.py new file mode 100644 index 0000000000..5a25bb94a7 --- /dev/null +++ b/tests/test_methods/test_encoder_decoder.py @@ -0,0 +1,72 @@ +from adapters import init +from transformers import AutoModelForSeq2SeqLM, BertConfig, EncoderDecoderConfig, EncoderDecoderModel + +from .utils import * + + +class EncoderDecoderAdapterTestBase(TextAdapterTestBase): + model_class = EncoderDecoderModel + config_class = EncoderDecoderConfig + config = staticmethod( + lambda: EncoderDecoderConfig.from_encoder_decoder_configs( + BertConfig( + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ), + BertConfig( + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + is_decoder=True, + add_cross_attention=True, + ), + ) + ) + tokenizer_name = "bert-base-uncased" + do_run_train_tests = False + + def test_generation(self): + model = AutoModelForSeq2SeqLM.from_config(self.config()) + init(model) + model.add_adapter("test", config="pfeiffer") + model.set_active_adapters("test") + tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) + + text = "This is a test sentence." + input_ids = tokenizer(text, return_tensors="pt").input_ids + + generated_ids = model.generate(input_ids, bos_token_id=100) + generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + self.assertNotEqual("", generated_text) + + def test_invertible_adapter_with_head(self): + """This test class is copied and adapted from the identically-named test in test_adapter_heads.py.""" + raise self.skipTest("AutoModelForSeq2SeqLM does not support using invertible adapters.") + + def test_adapter_fusion_save_with_head(self): + # This test is not applicable to the encoder-decoder model since it has no heads. + self.skipTest("Not applicable to the encoder-decoder model.") + + def test_forward_with_past(self): + # This test is not applicable to the encoder-decoder model since it has no heads. + self.skipTest("Not applicable to the encoder-decoder model.") + + def test_output_adapter_gating_scores_unipelt(self): + # TODO currently not supported + self.skipTest("Not implemented.") + + def test_output_adapter_fusion_attentions(self): + # TODO currently not supported + self.skipTest("Not implemented.") + + +test_methods = generate_method_tests( + EncoderDecoderAdapterTestBase, + excluded_tests=["Heads", "ConfigUnion", "Embeddings", "Composition", "PromptTuning", "ClassConversion"], +) + +for test_class_name, test_class in test_methods.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_llama.py b/tests/test_methods/test_llama.py new file mode 100644 index 0000000000..ad4b1d0603 --- /dev/null +++ b/tests/test_methods/test_llama.py @@ -0,0 +1,33 @@ +from transformers.models.llama.configuration_llama import LlamaConfig + +from .utils import * + + +class LlamaAdapterTestBase(TextAdapterTestBase): + config_class = LlamaConfig + config = make_config( + LlamaConfig, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=4, + intermediate_size=37, + hidden_act="gelu", + pad_token_id=0, + ) + tokenizer_name = "openlm-research/open_llama_13b" + + +method_tests = generate_method_tests(LlamaAdapterTestBase, excluded_tests=["PromptTuning"]) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class + + +@require_torch +class ClassConversion( + ModelClassConversionTestMixin, + LlamaAdapterTestBase, + unittest.TestCase, +): + def test_conversion_question_answering_model(self): + raise self.skipTest("We don't support the Llama QA model.") diff --git a/tests/test_methods/test_mbart.py b/tests/test_methods/test_mbart.py new file mode 100644 index 0000000000..08c51f6ee4 --- /dev/null +++ b/tests/test_methods/test_mbart.py @@ -0,0 +1,26 @@ +from transformers import MBartConfig + +from .utils import * + + +class MBartAdapterTestBase(TextAdapterTestBase): + config_class = MBartConfig + config = make_config( + MBartConfig, + d_model=16, + encoder_layers=2, + decoder_layers=2, + encoder_attention_heads=4, + decoder_attention_heads=4, + encoder_ffn_dim=4, + decoder_ffn_dim=4, + vocab_size=250027, + ) + tokenizer_name = "facebook/mbart-large-cc25" + + +method_tests = generate_method_tests( + MBartAdapterTestBase, excluded_tests=["ConfigUnion", "Embeddings", "PromptTuning"] +) +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_mistral.py b/tests/test_methods/test_mistral.py new file mode 100644 index 0000000000..04573aaa34 --- /dev/null +++ b/tests/test_methods/test_mistral.py @@ -0,0 +1,24 @@ +from transformers.models.mistral.configuration_mistral import MistralConfig + +from .utils import * + + +class MistralAdapterTestBase(TextAdapterTestBase): + config_class = MistralConfig + config = make_config( + MistralConfig, + hidden_size=32, + num_hidden_layers=5, + num_attention_heads=8, + intermediate_size=37, + hidden_act="gelu", + hidden_dropout_prob=0.1, + pad_token_id=0, + ) + tokenizer_name = "HuggingFaceH4/zephyr-7b-beta" + + +test_methods = generate_method_tests(MistralAdapterTestBase, excluded_tests=["PromptTuning", "ConfigUnion"]) + +for test_class_name, test_class in test_methods.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_mt5.py b/tests/test_methods/test_mt5.py new file mode 100644 index 0000000000..45a96a1f10 --- /dev/null +++ b/tests/test_methods/test_mt5.py @@ -0,0 +1,26 @@ +from transformers import MT5Config + +from .utils import * + + +@require_torch +class MT5AdapterTestBase(TextAdapterTestBase): + config_class = MT5Config + config = make_config( + MT5Config, + d_model=16, + num_layers=2, + num_decoder_layers=2, + num_heads=4, + d_ff=4, + d_kv=16 // 4, + tie_word_embeddings=False, + decoder_start_token_id=0, + ) + tokenizer_name = "google/mt5-base" + + +method_tests = generate_method_tests(MT5AdapterTestBase, excluded_tests=["PromptTuning", "ConfigUnion"]) + +for test_name, test_class in method_tests.items(): + globals()[test_name] = test_class diff --git a/tests/test_methods/test_plbart.py b/tests/test_methods/test_plbart.py new file mode 100644 index 0000000000..c722ecaeec --- /dev/null +++ b/tests/test_methods/test_plbart.py @@ -0,0 +1,25 @@ +from transformers import PLBartConfig + +from .utils import * + + +class PLBartAdapterTestBase(TextAdapterTestBase): + config_class = PLBartConfig + config = make_config( + PLBartConfig, + d_model=16, + encoder_layers=2, + decoder_layers=2, + encoder_attention_heads=4, + decoder_attention_heads=4, + encoder_ffn_dim=4, + decoder_ffn_dim=4, + scale_embedding=False, # Required for embedding tests + ) + tokenizer_name = "uclanlp/plbart-base" + + +method_tests = generate_method_tests(PLBartAdapterTestBase, excluded_tests=["PromptTuning"]) + +for test_name, test_class in method_tests.items(): + globals()[test_name] = test_class diff --git a/tests/test_methods/test_roberta.py b/tests/test_methods/test_roberta.py new file mode 100644 index 0000000000..445636d5f9 --- /dev/null +++ b/tests/test_methods/test_roberta.py @@ -0,0 +1,22 @@ +from transformers import RobertaConfig + +from .utils import * + + +class RobertaAdapterTestBase(TextAdapterTestBase): + config_class = RobertaConfig + config = make_config( + RobertaConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + vocab_size=50265, + ) + tokenizer_name = "roberta-base" + + +method_tests = generate_method_tests(RobertaAdapterTestBase) + +for test_name, test_class in method_tests.items(): + globals()[test_name] = test_class From 5f559353235a42b38f84811985eef1cc264ece6c Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 21 Nov 2024 16:35:20 +0100 Subject: [PATCH 23/63] Add more models --- tests/test_methods/test_t5.py | 25 +++++++++++++++++++++++++ tests/test_methods/test_vit.py | 21 +++++++++++++++++++++ tests/test_methods/test_xlm_roberta.py | 21 +++++++++++++++++++++ tests/test_methods/test_xmod.py | 23 +++++++++++++++++++++++ 4 files changed, 90 insertions(+) create mode 100644 tests/test_methods/test_t5.py create mode 100644 tests/test_methods/test_vit.py create mode 100644 tests/test_methods/test_xlm_roberta.py create mode 100644 tests/test_methods/test_xmod.py diff --git a/tests/test_methods/test_t5.py b/tests/test_methods/test_t5.py new file mode 100644 index 0000000000..3d99b63439 --- /dev/null +++ b/tests/test_methods/test_t5.py @@ -0,0 +1,25 @@ +from transformers import T5Config + +from .utils import * + + +@require_torch +class T5AdapterTestBase(TextAdapterTestBase): + config_class = T5Config + config = make_config( + T5Config, + d_model=16, + num_layers=2, + num_decoder_layers=2, + num_heads=4, + d_ff=4, + d_kv=16 // 4, + tie_word_embeddings=False, + decoder_start_token_id=0, + ) + tokenizer_name = "t5-base" + + +method_tests = generate_method_tests(T5AdapterTestBase, excluded_tests=["ConfigUnion", "PromptTuning"]) +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_vit.py b/tests/test_methods/test_vit.py new file mode 100644 index 0000000000..69bfc44028 --- /dev/null +++ b/tests/test_methods/test_vit.py @@ -0,0 +1,21 @@ +from transformers import ViTConfig + +from .utils import * + + +class ViTAdapterTestBase(VisionAdapterTestBase): + config_class = ViTConfig + config = make_config( + ViTConfig, + image_size=224, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + feature_extractor_name = "google/vit-base-patch16-224-in21k" + + +method_tests = generate_method_tests(ViTAdapterTestBase, excluded_tests=["ConfigUnion", "Embeddings", "Composition"]) +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_xlm_roberta.py b/tests/test_methods/test_xlm_roberta.py new file mode 100644 index 0000000000..a54f716b78 --- /dev/null +++ b/tests/test_methods/test_xlm_roberta.py @@ -0,0 +1,21 @@ +from transformers import XLMRobertaConfig + +from .utils import * + + +class XLMRobertaAdapterTestBase(TextAdapterTestBase): + config_class = XLMRobertaConfig + config = make_config( + XLMRobertaConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + vocab_size=250002, + ) + tokenizer_name = "xlm-roberta-base" + + +method_tests = generate_method_tests(XLMRobertaAdapterTestBase, excluded_tests=["ConfigUnion", "Embeddings"]) +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_xmod.py b/tests/test_methods/test_xmod.py new file mode 100644 index 0000000000..388102ddb3 --- /dev/null +++ b/tests/test_methods/test_xmod.py @@ -0,0 +1,23 @@ +from transformers import XmodConfig + +from .utils import * + + +class XmodAdapterTestBase(TextAdapterTestBase): + config_class = XmodConfig + config = make_config( + XmodConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + vocab_size=250002, + max_position_embeddings=512, + default_language="en_XX", + ) + tokenizer_name = "xlm-roberta-base" + + +method_tests = generate_method_tests(XmodAdapterTestBase, excluded_tests=["ConfigUnion", "Embeddings"]) +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class From 147c8af5226eea42e9a67fc14f94c0360c673d8c Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 27 Nov 2024 17:26:56 +0100 Subject: [PATCH 24/63] Re-add whisper --- tests/test_impl/base.py | 4 +-- tests/test_impl/composition/test_parallel.py | 4 +-- tests/test_methods/base.py | 28 +++++++++++++++---- tests/test_methods/test_whisper.py | 29 ++++++++++++++++++++ tests/test_methods/utils.py | 2 +- 5 files changed, 56 insertions(+), 11 deletions(-) create mode 100644 tests/test_methods/test_whisper.py diff --git a/tests/test_impl/base.py b/tests/test_impl/base.py index 090d277fd6..629a7b1e1e 100644 --- a/tests/test_impl/base.py +++ b/tests/test_impl/base.py @@ -375,6 +375,6 @@ def run_generate_test(self, adapter_config, max_new_tokens=32): add_lm_head(self.config_class, model, "generate") model.set_active_adapters("generate") model.to(torch_device) - input_ids = self.build_rand_ids_tensor(self.input_shape).to(torch_device) - generated = model.generate(input_ids, max_new_tokens=max_new_tokens) + generate_input = self.build_generate_input(self.input_shape).to(torch_device) + generated = model.generate(generate_input, max_new_tokens=max_new_tokens) self.assertLessEqual(generated.shape, (self.input_shape[0], self.input_shape[1] + max_new_tokens)) diff --git a/tests/test_impl/composition/test_parallel.py b/tests/test_impl/composition/test_parallel.py index f6038b572c..11c84701ec 100644 --- a/tests/test_impl/composition/test_parallel.py +++ b/tests/test_impl/composition/test_parallel.py @@ -130,8 +130,8 @@ def test_parallel_generate(self, max_new_tokens=32): add_lm_head(self.config_class, model1, "adapter2") model1.set_active_adapters(Parallel("adapter1", "adapter2")) model1.to(torch_device) - input_ids = self.build_rand_ids_tensor(self.input_shape).to(torch_device) - generated = model1.generate(input_ids, max_new_tokens=max_new_tokens) + generate_input = self.build_generate_input(self.input_shape).to(torch_device) + generated = model1.generate(generate_input, max_new_tokens=max_new_tokens) self.assertLessEqual(generated.shape, (self.input_shape[0] * 2, self.input_shape[1] + max_new_tokens)) diff --git a/tests/test_methods/base.py b/tests/test_methods/base.py index 569e493f01..ffc9569450 100644 --- a/tests/test_methods/base.py +++ b/tests/test_methods/base.py @@ -25,7 +25,6 @@ class AbstractAdapterTestBase: tokenizer_name = "tests/fixtures/SiBERT" # path to default tokenizer config available in the test repo config = None # specified in the actual model test classes input_shape = () # (batch_size, seq_length) - input_shape_generate = () # (batch_size, seq_length) leave_out_layers = [] do_run_train_tests = True num_labels = 2 @@ -87,12 +86,15 @@ def get_conversion_model(self): """Returns the respective conversion class of the adapter model for the conversion tests.""" raise NotImplementedError("get_conversion_model() must be implemented in the subclass.") + def build_generate_input(self, shape): + """The generate() functions for inference require different inputs depeding on the model type. E.g. the text models require input_ids, where as the audio models require input_features""" + return self.build_rand_ids_tensor(self.input_shape if not shape else shape).to(torch_device) + class TextAdapterTestBase(AbstractAdapterTestBase): """Base class for adapter tests for text models. Text models test classes should inherit from this class and override the attributes and functions as needed.""" input_shape = (3, 64) - input_shape_generate = (1, 4) leave_out_layers = [0, 1] batch_size, seq_length = ( input_shape # TODO: Check in which tests this is needed and if we can simplify by using input_shape @@ -190,9 +192,9 @@ class AudioAdapterTestBase(AbstractAdapterTestBase): """Base class for adapter tests for audio models. Audio models test classes should inherit from this class and override the attributes and functions as needed.""" input_shape = (3, 80, 3000) # (batch_size, n_mels, enc_seq_len) - generate_input_shape = (1, 80, 3000) time_window = 3000 # Time window for audio samples seq_length = 80 + batch_size = 3 _TASK_DATASET_MAPPING = { # TODO: build global mapping for all tasks and datasets @@ -218,9 +220,7 @@ def get_input_samples(self, shape=None, config=None, **kwargs): # Add decoder input ids for models with a decoder if config and config.is_encoder_decoder: - in_data["decoder_input_ids"] = self.build_rand_tensor( - (shape[:-1]), dtype=torch.long, vocab_size=config.vocab_size - ) + in_data["decoder_input_ids"] = self.build_rand_ids_tensor((shape[:-1]), vocab_size=config.vocab_size) return in_data def get_dataset(self, task_type: str = "seq2seq_lm", **kwargs): @@ -242,3 +242,19 @@ def get_conversion_model(self): ) label_dict["labels"] = label_dict["decoder_input_ids"] return model, label_dict + + def build_generate_input(self, shape): + return self.build_rand_tensor(self.input_shape if not shape else shape, dtype=torch.float) + + def attach_labels(self, inputs): + inputs["labels"] = torch.randint(0, 2, (self.batch_size, self.seq_length), device=torch_device) + return inputs + + def get_dataset_non_batched(self, config): + dataset_batched = self.get_dataset() + dataset = [{} for _ in range(len(dataset_batched))] + # For non-batched training, we need to wrap the samples by an additional dimension + for i in range(len(dataset_batched)): + for key, value in dataset_batched[i].items(): + dataset[i][key] = torch.unsqueeze(value, 0) + return dataset diff --git a/tests/test_methods/test_whisper.py b/tests/test_methods/test_whisper.py new file mode 100644 index 0000000000..0d1cbbbd40 --- /dev/null +++ b/tests/test_methods/test_whisper.py @@ -0,0 +1,29 @@ +from transformers import WhisperConfig + +from .utils import * + + +class WhisperAdapterTestBase(AudioAdapterTestBase): + config_class = WhisperConfig + config = make_config( + WhisperConfig, + d_model=16, + encoder_layers=2, + decoder_layers=2, + encoder_attention_heads=4, + decoder_attention_heads=4, + encoder_ffn_dim=4, + decoder_ffn_dim=4, + vocab_size=51865, + ) + tokenizer_name = "openai/whisper-small" + sampling_rate = 16000 + decoder_start_token_id = 50257 + + def test_parallel_training_lora(self): + self.skipTest("Not supported for Whisper") + + +method_tests = generate_method_tests(WhisperAdapterTestBase, excluded_tests=["PromptTuning"]) +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/utils.py b/tests/test_methods/utils.py index 6ba783dae9..4e9a933789 100644 --- a/tests/test_methods/utils.py +++ b/tests/test_methods/utils.py @@ -22,7 +22,7 @@ from tests.test_impl.peft.test_reft import ReftTestMixin from tests.test_impl.peft.test_unipelt import UniPELTTestMixin from tests.test_impl.utils import make_config -from tests.test_methods.base import TextAdapterTestBase, VisionAdapterTestBase +from tests.test_methods.base import AudioAdapterTestBase, TextAdapterTestBase, VisionAdapterTestBase from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer from transformers.testing_utils import require_torch, torch_device From b2979ce7182996cc91b591e77150ab357eab051d Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 17 Dec 2024 00:35:33 +0100 Subject: [PATCH 25/63] Changes: - introduce parameter to cut down on makefile commands - draw out tests that are not executed on every model into a seperate directory - move the adapter method test implemenations into the method tests directory - rename files for more clarity --- Makefile | 68 +++---------------- setup.cfg | 2 +- tests/test_methods/{utils.py => generator.py} | 39 ++++++----- .../method_test_impl}/__init__.py | 0 .../method_test_impl}/base.py | 0 .../method_test_impl}/composition/__init__.py | 0 .../composition/test_parallel.py | 2 +- .../method_test_impl}/core/__init__.py | 0 .../core/test_adapter_backward_compability.py | 2 +- .../core/test_adapter_conversion.py | 0 .../core/test_adapter_fusion_common.py | 0 .../method_test_impl}/embeddings/__init__.py | 0 .../embeddings/test_adapter_embeddings.py | 0 .../method_test_impl}/heads/__init__.py | 0 .../heads/test_adapter_heads.py | 2 +- .../method_test_impl}/peft/__init__.py | 0 .../peft/test_adapter_common.py | 4 +- .../method_test_impl}/peft/test_compacter.py | 2 +- .../peft/test_config_union.py | 2 +- .../method_test_impl}/peft/test_ia3.py | 2 +- .../method_test_impl}/peft/test_lora.py | 2 +- .../peft/test_prefix_tuning.py | 2 +- .../peft/test_prompt_tuning.py | 2 +- .../method_test_impl}/peft/test_reft.py | 2 +- .../method_test_impl}/peft/test_unipelt.py | 2 +- .../method_test_impl}/utils.py | 0 tests/test_methods/test_albert.py | 2 +- tests/test_methods/test_bart.py | 2 +- tests/test_methods/test_beit.py | 2 +- tests/test_methods/test_bert.py | 2 +- tests/test_methods/test_bert_generation.py | 2 +- tests/test_methods/test_clip_text.py | 2 +- tests/test_methods/test_clip_vision.py | 2 +- tests/test_methods/test_deberta.py | 2 +- tests/test_methods/test_debertaV2.py | 2 +- tests/test_methods/test_distilbert.py | 2 +- tests/test_methods/test_electra.py | 2 +- tests/test_methods/test_encoder_decoder.py | 2 +- tests/test_methods/test_gpt2.py | 2 +- tests/test_methods/test_llama.py | 2 +- tests/test_methods/test_mbart.py | 2 +- tests/test_methods/test_mistral.py | 2 +- tests/test_methods/test_mt5.py | 2 +- tests/test_methods/test_plbart.py | 2 +- tests/test_methods/test_roberta.py | 2 +- tests/test_methods/test_t5.py | 2 +- tests/test_methods/test_vit.py | 2 +- tests/test_methods/test_whisper.py | 2 +- tests/test_methods/test_xlm_roberta.py | 2 +- tests/test_methods/test_xmod.py | 2 +- .../test_adapter_composition.py | 2 +- .../core => test_misc}/test_adapter_config.py | 0 .../test_adapter_custom_head.py | 2 +- .../test_adapter_fusion_config.py | 0 .../core => test_misc}/test_adapter_hub.py | 2 +- .../test_adapter_safetensors.py | 0 .../test_adapter_save_id2label.py | 0 .../test_adapter_trainer}/__init__.py | 0 .../test_adapter_trainer.py | 0 .../test_adapter_trainer_ext.py | 0 60 files changed, 72 insertions(+), 117 deletions(-) rename tests/test_methods/{utils.py => generator.py} (73%) rename tests/{test_impl => test_methods/method_test_impl}/__init__.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/base.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/composition/__init__.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/composition/test_parallel.py (99%) rename tests/{test_impl => test_methods/method_test_impl}/core/__init__.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/core/test_adapter_backward_compability.py (96%) rename tests/{test_impl => test_methods/method_test_impl}/core/test_adapter_conversion.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/core/test_adapter_fusion_common.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/embeddings/__init__.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/embeddings/test_adapter_embeddings.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/heads/__init__.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/heads/test_adapter_heads.py (99%) rename tests/{test_impl => test_methods/method_test_impl}/peft/__init__.py (100%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_adapter_common.py (99%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_compacter.py (96%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_config_union.py (95%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_ia3.py (95%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_lora.py (99%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_prefix_tuning.py (97%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_prompt_tuning.py (94%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_reft.py (97%) rename tests/{test_impl => test_methods/method_test_impl}/peft/test_unipelt.py (96%) rename tests/{test_impl => test_methods/method_test_impl}/utils.py (100%) rename tests/{test_impl/composition => test_misc}/test_adapter_composition.py (99%) rename tests/{test_impl/core => test_misc}/test_adapter_config.py (100%) rename tests/{test_impl/heads => test_misc}/test_adapter_custom_head.py (98%) rename tests/{test_impl/core => test_misc}/test_adapter_fusion_config.py (100%) rename tests/{test_impl/core => test_misc}/test_adapter_hub.py (99%) rename tests/{test_impl/core => test_misc}/test_adapter_safetensors.py (100%) rename tests/{test_impl/core => test_misc}/test_adapter_save_id2label.py (100%) rename tests/{test_impl/trainer => test_misc/test_adapter_trainer}/__init__.py (100%) rename tests/{test_impl/trainer => test_misc/test_adapter_trainer}/test_adapter_trainer.py (100%) rename tests/{test_impl/trainer => test_misc/test_adapter_trainer}/test_adapter_trainer_ext.py (100%) diff --git a/Makefile b/Makefile index 862f04bf52..a7ce91e4b6 100644 --- a/Makefile +++ b/Makefile @@ -34,69 +34,21 @@ style: test: python -m pytest -n auto --dist=loadfile -s -v ./tests/ -# run tests for the adapter methods +# run all tests for the adapter methods for all adapter models test-adapter-methods: python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -# run tests for the adapter models -test-adapter-models: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_models/ - -# run the core tests for all models -test-adapter-core: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m core - -# run the adapter composition tests for all models -test-adapter-composition: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m composition - -# run the head tests for all models -test-adapter-heads: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m heads - -# run the embedding teasts for all models -test-adapter-embeddings: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m embeddings - -# run the class conversion tests for all models -test-adapter-class_conversion: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m class_conversion - -# run the prefix tuning tests for all models -test-adapter-prefix_tuning: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m prefix_tuning +# run a subset of the adapter method tests for all adapter models +# list of all subsets: [core, heads, embeddings, composition, prefix_tuning, prompt_tuning, reft, unipelt, compacter, bottleneck, ia3, lora, config_union] +subset ?= +test-adapter-method-subset: + @echo "Running subset $(subset)" + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m $(subset) -# run the prompt tuning tests for all models -test-adapter-prompt_tuning: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m prompt_tuning -# run the reft tests for all models -test-adapter-reft: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m reft - -# run the unipelt tests for all models -test-adapter-unipelt: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m unipelt - -# run the compacter tests for all models -test-adapter-compacter: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m compacter - -# run the bottleneck tests for all models -test-adapter-bottleneck: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m bottleneck - -# run the ia3 tests for all models -test-adapter-ia3: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m ia3 - -# run the lora tests for all models -test-adapter-lora: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m lora - -# run the config union tests for all models -test-adapter-config_union: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m config_union +# run the hugginface test suite for all adapter models +test-adapter-models: + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_models/ # Run tests for examples test-examples: diff --git a/setup.cfg b/setup.cfg index 1323b957d7..3452fa3173 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,7 +50,7 @@ use_parentheses = True ignore = E203, E501, E731, E741, W503, W605 max-line-length = 119 per-file-ignores = - tests/test_methods/utils.py: F401, F403, F405 + tests/test_methods/generator.py: F401, F403, F405 tests/test_methods/test_*.py:F403,F405 [tool:pytest] diff --git a/tests/test_methods/utils.py b/tests/test_methods/generator.py similarity index 73% rename from tests/test_methods/utils.py rename to tests/test_methods/generator.py index 4e9a933789..3af8f82b72 100644 --- a/tests/test_methods/utils.py +++ b/tests/test_methods/generator.py @@ -6,23 +6,26 @@ import pytest import torch -from tests.test_impl.composition.test_parallel import ParallelAdapterInferenceTestMixin, ParallelTrainingMixin -from tests.test_impl.core.test_adapter_backward_compability import CompabilityTestMixin -from tests.test_impl.core.test_adapter_conversion import ModelClassConversionTestMixin -from tests.test_impl.core.test_adapter_fusion_common import AdapterFusionModelTestMixin -from tests.test_impl.embeddings.test_adapter_embeddings import EmbeddingTestMixin -from tests.test_impl.heads.test_adapter_heads import PredictionHeadModelTestMixin -from tests.test_impl.peft.test_adapter_common import BottleneckAdapterTestMixin -from tests.test_impl.peft.test_compacter import CompacterTestMixin -from tests.test_impl.peft.test_config_union import ConfigUnionAdapterTest -from tests.test_impl.peft.test_ia3 import IA3TestMixin -from tests.test_impl.peft.test_lora import LoRATestMixin -from tests.test_impl.peft.test_prefix_tuning import PrefixTuningTestMixin -from tests.test_impl.peft.test_prompt_tuning import PromptTuningTestMixin -from tests.test_impl.peft.test_reft import ReftTestMixin -from tests.test_impl.peft.test_unipelt import UniPELTTestMixin -from tests.test_impl.utils import make_config from tests.test_methods.base import AudioAdapterTestBase, TextAdapterTestBase, VisionAdapterTestBase +from tests.test_methods.method_test_impl.composition.test_parallel import ( + ParallelAdapterInferenceTestMixin, + ParallelTrainingMixin, +) +from tests.test_methods.method_test_impl.core.test_adapter_backward_compability import CompabilityTestMixin +from tests.test_methods.method_test_impl.core.test_adapter_conversion import ModelClassConversionTestMixin +from tests.test_methods.method_test_impl.core.test_adapter_fusion_common import AdapterFusionModelTestMixin +from tests.test_methods.method_test_impl.embeddings.test_adapter_embeddings import EmbeddingTestMixin +from tests.test_methods.method_test_impl.heads.test_adapter_heads import PredictionHeadModelTestMixin +from tests.test_methods.method_test_impl.peft.test_adapter_common import BottleneckAdapterTestMixin +from tests.test_methods.method_test_impl.peft.test_compacter import CompacterTestMixin +from tests.test_methods.method_test_impl.peft.test_config_union import ConfigUnionAdapterTest +from tests.test_methods.method_test_impl.peft.test_ia3 import IA3TestMixin +from tests.test_methods.method_test_impl.peft.test_lora import LoRATestMixin +from tests.test_methods.method_test_impl.peft.test_prefix_tuning import PrefixTuningTestMixin +from tests.test_methods.method_test_impl.peft.test_prompt_tuning import PromptTuningTestMixin +from tests.test_methods.method_test_impl.peft.test_reft import ReftTestMixin +from tests.test_methods.method_test_impl.peft.test_unipelt import UniPELTTestMixin +from tests.test_methods.method_test_impl.utils import make_config from transformers import AutoTokenizer, PreTrainedModel, PreTrainedTokenizer from transformers.testing_utils import require_torch, torch_device @@ -30,9 +33,9 @@ def generate_method_tests( model_test_base, excluded_tests=[], -): +) -> dict: """ - Generates a set of test classes for a given model test base. + Generates a set of method test classes for a given model test base. Args: model_test_base (type): The base class for the model tests. diff --git a/tests/test_impl/__init__.py b/tests/test_methods/method_test_impl/__init__.py similarity index 100% rename from tests/test_impl/__init__.py rename to tests/test_methods/method_test_impl/__init__.py diff --git a/tests/test_impl/base.py b/tests/test_methods/method_test_impl/base.py similarity index 100% rename from tests/test_impl/base.py rename to tests/test_methods/method_test_impl/base.py diff --git a/tests/test_impl/composition/__init__.py b/tests/test_methods/method_test_impl/composition/__init__.py similarity index 100% rename from tests/test_impl/composition/__init__.py rename to tests/test_methods/method_test_impl/composition/__init__.py diff --git a/tests/test_impl/composition/test_parallel.py b/tests/test_methods/method_test_impl/composition/test_parallel.py similarity index 99% rename from tests/test_impl/composition/test_parallel.py rename to tests/test_methods/method_test_impl/composition/test_parallel.py index 11c84701ec..98da4a5c7e 100644 --- a/tests/test_impl/composition/test_parallel.py +++ b/tests/test_methods/method_test_impl/composition/test_parallel.py @@ -12,7 +12,7 @@ T5AdapterModel, ) from adapters.composition import BatchSplit, Parallel -from tests.test_impl.utils import add_lm_head +from tests.test_methods.method_test_impl.utils import add_lm_head from transformers import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, Trainer, TrainingArguments from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/core/__init__.py b/tests/test_methods/method_test_impl/core/__init__.py similarity index 100% rename from tests/test_impl/core/__init__.py rename to tests/test_methods/method_test_impl/core/__init__.py diff --git a/tests/test_impl/core/test_adapter_backward_compability.py b/tests/test_methods/method_test_impl/core/test_adapter_backward_compability.py similarity index 96% rename from tests/test_impl/core/test_adapter_backward_compability.py rename to tests/test_methods/method_test_impl/core/test_adapter_backward_compability.py index 722d6499bf..196380524f 100644 --- a/tests/test_impl/core/test_adapter_backward_compability.py +++ b/tests/test_methods/method_test_impl/core/test_adapter_backward_compability.py @@ -3,7 +3,7 @@ import tempfile from adapters import SeqBnConfig, __version__ -from tests.test_impl.utils import create_twin_models +from tests.test_methods.method_test_impl.utils import create_twin_models from transformers.testing_utils import require_torch diff --git a/tests/test_impl/core/test_adapter_conversion.py b/tests/test_methods/method_test_impl/core/test_adapter_conversion.py similarity index 100% rename from tests/test_impl/core/test_adapter_conversion.py rename to tests/test_methods/method_test_impl/core/test_adapter_conversion.py diff --git a/tests/test_impl/core/test_adapter_fusion_common.py b/tests/test_methods/method_test_impl/core/test_adapter_fusion_common.py similarity index 100% rename from tests/test_impl/core/test_adapter_fusion_common.py rename to tests/test_methods/method_test_impl/core/test_adapter_fusion_common.py diff --git a/tests/test_impl/embeddings/__init__.py b/tests/test_methods/method_test_impl/embeddings/__init__.py similarity index 100% rename from tests/test_impl/embeddings/__init__.py rename to tests/test_methods/method_test_impl/embeddings/__init__.py diff --git a/tests/test_impl/embeddings/test_adapter_embeddings.py b/tests/test_methods/method_test_impl/embeddings/test_adapter_embeddings.py similarity index 100% rename from tests/test_impl/embeddings/test_adapter_embeddings.py rename to tests/test_methods/method_test_impl/embeddings/test_adapter_embeddings.py diff --git a/tests/test_impl/heads/__init__.py b/tests/test_methods/method_test_impl/heads/__init__.py similarity index 100% rename from tests/test_impl/heads/__init__.py rename to tests/test_methods/method_test_impl/heads/__init__.py diff --git a/tests/test_impl/heads/test_adapter_heads.py b/tests/test_methods/method_test_impl/heads/test_adapter_heads.py similarity index 99% rename from tests/test_impl/heads/test_adapter_heads.py rename to tests/test_methods/method_test_impl/heads/test_adapter_heads.py index c3ceb2adfd..6179a19c28 100644 --- a/tests/test_impl/heads/test_adapter_heads.py +++ b/tests/test_methods/method_test_impl/heads/test_adapter_heads.py @@ -7,7 +7,7 @@ from adapters import ADAPTER_MODEL_MAPPING, AdapterSetup, AutoAdapterModel from adapters.composition import BatchSplit, Stack from adapters.heads import PredictionHead -from tests.test_impl.utils import create_twin_models +from tests.test_methods.method_test_impl.utils import create_twin_models from transformers import AutoModelForSequenceClassification from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/peft/__init__.py b/tests/test_methods/method_test_impl/peft/__init__.py similarity index 100% rename from tests/test_impl/peft/__init__.py rename to tests/test_methods/method_test_impl/peft/__init__.py diff --git a/tests/test_impl/peft/test_adapter_common.py b/tests/test_methods/method_test_impl/peft/test_adapter_common.py similarity index 99% rename from tests/test_impl/peft/test_adapter_common.py rename to tests/test_methods/method_test_impl/peft/test_adapter_common.py index 696e8ddc09..4aeeab01ed 100644 --- a/tests/test_impl/peft/test_adapter_common.py +++ b/tests/test_methods/method_test_impl/peft/test_adapter_common.py @@ -19,8 +19,8 @@ SeqBnInvConfig, ) from adapters.heads.language_modeling import CausalLMHead -from tests.test_impl.base import AdapterMethodBaseTestMixin -from tests.test_impl.utils import create_twin_models +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.utils import create_twin_models from transformers import MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, CLIPConfig from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/peft/test_compacter.py b/tests/test_methods/method_test_impl/peft/test_compacter.py similarity index 96% rename from tests/test_impl/peft/test_compacter.py rename to tests/test_methods/method_test_impl/peft/test_compacter.py index 39b17c0815..31fab6ccee 100644 --- a/tests/test_impl/peft/test_compacter.py +++ b/tests/test_methods/method_test_impl/peft/test_compacter.py @@ -1,5 +1,5 @@ from adapters import ADAPTER_MODEL_MAPPING, CompacterPlusPlusConfig -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/test_impl/peft/test_config_union.py b/tests/test_methods/method_test_impl/peft/test_config_union.py similarity index 95% rename from tests/test_impl/peft/test_config_union.py rename to tests/test_methods/method_test_impl/peft/test_config_union.py index ca33533089..45c3a2d66c 100644 --- a/tests/test_impl/peft/test_config_union.py +++ b/tests/test_methods/method_test_impl/peft/test_config_union.py @@ -6,7 +6,7 @@ PrefixTuningConfig, SeqBnConfig, ) -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/test_impl/peft/test_ia3.py b/tests/test_methods/method_test_impl/peft/test_ia3.py similarity index 95% rename from tests/test_impl/peft/test_ia3.py rename to tests/test_methods/method_test_impl/peft/test_ia3.py index 8356c1edd2..60c7ae214c 100644 --- a/tests/test_impl/peft/test_ia3.py +++ b/tests/test_methods/method_test_impl/peft/test_ia3.py @@ -1,5 +1,5 @@ from adapters import IA3Config -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/test_impl/peft/test_lora.py b/tests/test_methods/method_test_impl/peft/test_lora.py similarity index 99% rename from tests/test_impl/peft/test_lora.py rename to tests/test_methods/method_test_impl/peft/test_lora.py index bfddec32fa..793d246a37 100644 --- a/tests/test_impl/peft/test_lora.py +++ b/tests/test_methods/method_test_impl/peft/test_lora.py @@ -4,7 +4,7 @@ from adapters import LoRAConfig from adapters.methods.lora import LoRALayer -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/test_impl/peft/test_prefix_tuning.py b/tests/test_methods/method_test_impl/peft/test_prefix_tuning.py similarity index 97% rename from tests/test_impl/peft/test_prefix_tuning.py rename to tests/test_methods/method_test_impl/peft/test_prefix_tuning.py index 333888f342..dc73cb8ae7 100644 --- a/tests/test_impl/peft/test_prefix_tuning.py +++ b/tests/test_methods/method_test_impl/peft/test_prefix_tuning.py @@ -1,7 +1,7 @@ import torch from adapters import ADAPTER_MODEL_MAPPING, PrefixTuningConfig -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers import CLIPConfig from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/peft/test_prompt_tuning.py b/tests/test_methods/method_test_impl/peft/test_prompt_tuning.py similarity index 94% rename from tests/test_impl/peft/test_prompt_tuning.py rename to tests/test_methods/method_test_impl/peft/test_prompt_tuning.py index 1b26d187b8..bd0ea206b1 100644 --- a/tests/test_impl/peft/test_prompt_tuning.py +++ b/tests/test_methods/method_test_impl/peft/test_prompt_tuning.py @@ -1,5 +1,5 @@ from adapters import PromptTuningConfig -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/test_impl/peft/test_reft.py b/tests/test_methods/method_test_impl/peft/test_reft.py similarity index 97% rename from tests/test_impl/peft/test_reft.py rename to tests/test_methods/method_test_impl/peft/test_reft.py index a5b52519f8..5d3d1c7d12 100644 --- a/tests/test_impl/peft/test_reft.py +++ b/tests/test_methods/method_test_impl/peft/test_reft.py @@ -1,5 +1,5 @@ from adapters import DiReftConfig, LoReftConfig, NoReftConfig -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch diff --git a/tests/test_impl/peft/test_unipelt.py b/tests/test_methods/method_test_impl/peft/test_unipelt.py similarity index 96% rename from tests/test_impl/peft/test_unipelt.py rename to tests/test_methods/method_test_impl/peft/test_unipelt.py index ca477a362a..7810e31e8e 100644 --- a/tests/test_impl/peft/test_unipelt.py +++ b/tests/test_methods/method_test_impl/peft/test_unipelt.py @@ -1,5 +1,5 @@ from adapters import UniPELTConfig -from tests.test_impl.base import AdapterMethodBaseTestMixin +from tests.test_methods.method_test_impl.base import AdapterMethodBaseTestMixin from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/utils.py b/tests/test_methods/method_test_impl/utils.py similarity index 100% rename from tests/test_impl/utils.py rename to tests/test_methods/method_test_impl/utils.py diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_albert.py index aea01d0170..971b8414bc 100644 --- a/tests/test_methods/test_albert.py +++ b/tests/test_methods/test_albert.py @@ -2,7 +2,7 @@ from transformers import AlbertConfig -from .utils import ( +from .generator import ( PredictionHeadModelTestMixin, TextAdapterTestBase, ceil, diff --git a/tests/test_methods/test_bart.py b/tests/test_methods/test_bart.py index b4f46e8cf9..8063a8b63f 100644 --- a/tests/test_methods/test_bart.py +++ b/tests/test_methods/test_bart.py @@ -1,6 +1,6 @@ from transformers import BartConfig -from .utils import * +from .generator import * class BartAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_beit.py b/tests/test_methods/test_beit.py index befec4b3e0..c235fdbe8e 100644 --- a/tests/test_methods/test_beit.py +++ b/tests/test_methods/test_beit.py @@ -1,6 +1,6 @@ from transformers import BeitConfig -from .utils import VisionAdapterTestBase, generate_method_tests, make_config +from .generator import VisionAdapterTestBase, generate_method_tests, make_config class BeitAdapterTestBase(VisionAdapterTestBase): diff --git a/tests/test_methods/test_bert.py b/tests/test_methods/test_bert.py index 04b00f0c38..335204d8f8 100644 --- a/tests/test_methods/test_bert.py +++ b/tests/test_methods/test_bert.py @@ -1,6 +1,6 @@ from transformers import BertConfig -from .utils import TextAdapterTestBase, generate_method_tests, make_config +from .generator import TextAdapterTestBase, generate_method_tests, make_config class BertAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_bert_generation.py b/tests/test_methods/test_bert_generation.py index 2a3bb4b4ff..6e436117d8 100644 --- a/tests/test_methods/test_bert_generation.py +++ b/tests/test_methods/test_bert_generation.py @@ -2,7 +2,7 @@ from transformers import AutoTokenizer, BertGenerationConfig -from .utils import TextAdapterTestBase, generate_method_tests, make_config, torch, torch_device +from .generator import TextAdapterTestBase, generate_method_tests, make_config, torch, torch_device class BertGenerationAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_clip_text.py b/tests/test_methods/test_clip_text.py index 8a0b05c80e..82db4fd30c 100644 --- a/tests/test_methods/test_clip_text.py +++ b/tests/test_methods/test_clip_text.py @@ -2,7 +2,7 @@ from transformers import CLIPConfig, CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPVisionConfig -from .utils import * +from .generator import * class CLIPTextAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_clip_vision.py b/tests/test_methods/test_clip_vision.py index 3686c20ae6..03762809f2 100644 --- a/tests/test_methods/test_clip_vision.py +++ b/tests/test_methods/test_clip_vision.py @@ -1,6 +1,6 @@ from transformers import CLIPVisionConfig, CLIPVisionModel, CLIPVisionModelWithProjection -from .utils import * +from .generator import * class CLIPVisionAdapterTestBase(VisionAdapterTestBase): diff --git a/tests/test_methods/test_deberta.py b/tests/test_methods/test_deberta.py index d53ae85f21..e7647f9f1e 100644 --- a/tests/test_methods/test_deberta.py +++ b/tests/test_methods/test_deberta.py @@ -1,6 +1,6 @@ from transformers import DebertaConfig -from .utils import * +from .generator import * class DebertaAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_debertaV2.py b/tests/test_methods/test_debertaV2.py index a6ef99b104..a179361e84 100644 --- a/tests/test_methods/test_debertaV2.py +++ b/tests/test_methods/test_debertaV2.py @@ -1,6 +1,6 @@ from transformers import DebertaV2Config -from .utils import * +from .generator import * class DebertaV2AdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_distilbert.py b/tests/test_methods/test_distilbert.py index 668d6e8b01..f9af758640 100644 --- a/tests/test_methods/test_distilbert.py +++ b/tests/test_methods/test_distilbert.py @@ -1,6 +1,6 @@ from transformers import DistilBertConfig -from .utils import * +from .generator import * class DistilBertAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_electra.py b/tests/test_methods/test_electra.py index 19d6622b93..ad17b1c0cd 100644 --- a/tests/test_methods/test_electra.py +++ b/tests/test_methods/test_electra.py @@ -1,6 +1,6 @@ from transformers import ElectraConfig -from .utils import * +from .generator import * class ElectraAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_encoder_decoder.py b/tests/test_methods/test_encoder_decoder.py index 5a25bb94a7..b6a33ded10 100644 --- a/tests/test_methods/test_encoder_decoder.py +++ b/tests/test_methods/test_encoder_decoder.py @@ -1,7 +1,7 @@ from adapters import init from transformers import AutoModelForSeq2SeqLM, BertConfig, EncoderDecoderConfig, EncoderDecoderModel -from .utils import * +from .generator import * class EncoderDecoderAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_gpt2.py index 848ae86999..227cb63bfc 100644 --- a/tests/test_methods/test_gpt2.py +++ b/tests/test_methods/test_gpt2.py @@ -1,6 +1,6 @@ from transformers import GPT2Config -from .utils import * +from .generator import * class GPT2AdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_llama.py b/tests/test_methods/test_llama.py index ad4b1d0603..a950ad3c53 100644 --- a/tests/test_methods/test_llama.py +++ b/tests/test_methods/test_llama.py @@ -1,6 +1,6 @@ from transformers.models.llama.configuration_llama import LlamaConfig -from .utils import * +from .generator import * class LlamaAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_mbart.py b/tests/test_methods/test_mbart.py index 08c51f6ee4..31130f0d3b 100644 --- a/tests/test_methods/test_mbart.py +++ b/tests/test_methods/test_mbart.py @@ -1,6 +1,6 @@ from transformers import MBartConfig -from .utils import * +from .generator import * class MBartAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_mistral.py b/tests/test_methods/test_mistral.py index 04573aaa34..61615dd311 100644 --- a/tests/test_methods/test_mistral.py +++ b/tests/test_methods/test_mistral.py @@ -1,6 +1,6 @@ from transformers.models.mistral.configuration_mistral import MistralConfig -from .utils import * +from .generator import * class MistralAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_mt5.py b/tests/test_methods/test_mt5.py index 45a96a1f10..744ff832cf 100644 --- a/tests/test_methods/test_mt5.py +++ b/tests/test_methods/test_mt5.py @@ -1,6 +1,6 @@ from transformers import MT5Config -from .utils import * +from .generator import * @require_torch diff --git a/tests/test_methods/test_plbart.py b/tests/test_methods/test_plbart.py index c722ecaeec..f620aba6e2 100644 --- a/tests/test_methods/test_plbart.py +++ b/tests/test_methods/test_plbart.py @@ -1,6 +1,6 @@ from transformers import PLBartConfig -from .utils import * +from .generator import * class PLBartAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_roberta.py b/tests/test_methods/test_roberta.py index 445636d5f9..1a8f403164 100644 --- a/tests/test_methods/test_roberta.py +++ b/tests/test_methods/test_roberta.py @@ -1,6 +1,6 @@ from transformers import RobertaConfig -from .utils import * +from .generator import * class RobertaAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_t5.py b/tests/test_methods/test_t5.py index 3d99b63439..78dfbef695 100644 --- a/tests/test_methods/test_t5.py +++ b/tests/test_methods/test_t5.py @@ -1,6 +1,6 @@ from transformers import T5Config -from .utils import * +from .generator import * @require_torch diff --git a/tests/test_methods/test_vit.py b/tests/test_methods/test_vit.py index 69bfc44028..11a2a072f7 100644 --- a/tests/test_methods/test_vit.py +++ b/tests/test_methods/test_vit.py @@ -1,6 +1,6 @@ from transformers import ViTConfig -from .utils import * +from .generator import * class ViTAdapterTestBase(VisionAdapterTestBase): diff --git a/tests/test_methods/test_whisper.py b/tests/test_methods/test_whisper.py index 0d1cbbbd40..62d22f644f 100644 --- a/tests/test_methods/test_whisper.py +++ b/tests/test_methods/test_whisper.py @@ -1,6 +1,6 @@ from transformers import WhisperConfig -from .utils import * +from .generator import * class WhisperAdapterTestBase(AudioAdapterTestBase): diff --git a/tests/test_methods/test_xlm_roberta.py b/tests/test_methods/test_xlm_roberta.py index a54f716b78..45f33b2ff1 100644 --- a/tests/test_methods/test_xlm_roberta.py +++ b/tests/test_methods/test_xlm_roberta.py @@ -1,6 +1,6 @@ from transformers import XLMRobertaConfig -from .utils import * +from .generator import * class XLMRobertaAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_methods/test_xmod.py b/tests/test_methods/test_xmod.py index 388102ddb3..a84b168d6f 100644 --- a/tests/test_methods/test_xmod.py +++ b/tests/test_methods/test_xmod.py @@ -1,6 +1,6 @@ from transformers import XmodConfig -from .utils import * +from .generator import * class XmodAdapterTestBase(TextAdapterTestBase): diff --git a/tests/test_impl/composition/test_adapter_composition.py b/tests/test_misc/test_adapter_composition.py similarity index 99% rename from tests/test_impl/composition/test_adapter_composition.py rename to tests/test_misc/test_adapter_composition.py index 410aea7ec8..29dade33a8 100644 --- a/tests/test_impl/composition/test_adapter_composition.py +++ b/tests/test_misc/test_adapter_composition.py @@ -5,7 +5,7 @@ import adapters from adapters import IA3Config, LoRAConfig, PrefixTuningConfig, SeqBnConfig from adapters.composition import Average, BatchSplit, Fuse, Parallel, Split, Stack, parse_composition -from tests.test_impl.utils import ids_tensor +from tests.test_methods.method_test_impl.utils import ids_tensor from transformers import BertConfig, BertForSequenceClassification from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/core/test_adapter_config.py b/tests/test_misc/test_adapter_config.py similarity index 100% rename from tests/test_impl/core/test_adapter_config.py rename to tests/test_misc/test_adapter_config.py diff --git a/tests/test_impl/heads/test_adapter_custom_head.py b/tests/test_misc/test_adapter_custom_head.py similarity index 98% rename from tests/test_impl/heads/test_adapter_custom_head.py rename to tests/test_misc/test_adapter_custom_head.py index 6de8a45d2c..68a6dd1946 100644 --- a/tests/test_impl/heads/test_adapter_custom_head.py +++ b/tests/test_misc/test_adapter_custom_head.py @@ -5,7 +5,7 @@ from adapters import AutoAdapterModel from adapters.heads import ClassificationHead, PredictionHead -from tests.test_impl.utils import ids_tensor +from tests.test_methods.method_test_impl.utils import ids_tensor from transformers import AutoConfig from transformers.testing_utils import require_torch, torch_device diff --git a/tests/test_impl/core/test_adapter_fusion_config.py b/tests/test_misc/test_adapter_fusion_config.py similarity index 100% rename from tests/test_impl/core/test_adapter_fusion_config.py rename to tests/test_misc/test_adapter_fusion_config.py diff --git a/tests/test_impl/core/test_adapter_hub.py b/tests/test_misc/test_adapter_hub.py similarity index 99% rename from tests/test_impl/core/test_adapter_hub.py rename to tests/test_misc/test_adapter_hub.py index 7aa22e22c8..f25ca97a74 100644 --- a/tests/test_impl/core/test_adapter_hub.py +++ b/tests/test_misc/test_adapter_hub.py @@ -8,7 +8,7 @@ from adapters import ADAPTER_CONFIG_MAP, AdapterConfig, BertAdapterModel, get_adapter_config_hash from adapters.trainer import AdapterTrainer as Trainer from adapters.utils import find_in_index -from tests.test_impl.utils import ids_tensor +from tests.test_methods.method_test_impl.utils import ids_tensor from transformers import ( AutoModel, AutoTokenizer, diff --git a/tests/test_impl/core/test_adapter_safetensors.py b/tests/test_misc/test_adapter_safetensors.py similarity index 100% rename from tests/test_impl/core/test_adapter_safetensors.py rename to tests/test_misc/test_adapter_safetensors.py diff --git a/tests/test_impl/core/test_adapter_save_id2label.py b/tests/test_misc/test_adapter_save_id2label.py similarity index 100% rename from tests/test_impl/core/test_adapter_save_id2label.py rename to tests/test_misc/test_adapter_save_id2label.py diff --git a/tests/test_impl/trainer/__init__.py b/tests/test_misc/test_adapter_trainer/__init__.py similarity index 100% rename from tests/test_impl/trainer/__init__.py rename to tests/test_misc/test_adapter_trainer/__init__.py diff --git a/tests/test_impl/trainer/test_adapter_trainer.py b/tests/test_misc/test_adapter_trainer/test_adapter_trainer.py similarity index 100% rename from tests/test_impl/trainer/test_adapter_trainer.py rename to tests/test_misc/test_adapter_trainer/test_adapter_trainer.py diff --git a/tests/test_impl/trainer/test_adapter_trainer_ext.py b/tests/test_misc/test_adapter_trainer/test_adapter_trainer_ext.py similarity index 100% rename from tests/test_impl/trainer/test_adapter_trainer_ext.py rename to tests/test_misc/test_adapter_trainer/test_adapter_trainer_ext.py From ffd21a9b1afa04cbd9f6e1adfef8826c0bb40c6c Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 18 Dec 2024 15:54:43 +0100 Subject: [PATCH 26/63] Add debug statements and only execute failing test --- Makefile | 3 ++- src/adapters/models/mbart/modeling_mbart.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a7ce91e4b6..382642bd39 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,8 @@ test: # run all tests for the adapter methods for all adapter models test-adapter-methods: - python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ + # python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ + pytest -s tests/test_methods/test_mbart.py::Composition::test_parallel_training_lora # run a subset of the adapter method tests for all adapter models # list of all subsets: [core, heads, embeddings, composition, prefix_tuning, prompt_tuning, reft, unipelt, compacter, bottleneck, ia3, lora, config_union] diff --git a/src/adapters/models/mbart/modeling_mbart.py b/src/adapters/models/mbart/modeling_mbart.py index 88dacef9a0..4eef5b5838 100644 --- a/src/adapters/models/mbart/modeling_mbart.py +++ b/src/adapters/models/mbart/modeling_mbart.py @@ -96,7 +96,9 @@ def forward( key_states, value_states, attention_mask = self.prefix_tuning( key_states, value_states, hidden_states, attention_mask ) + print(f"query_states before: {query_states.shape}") (query_states,) = adjust_tensors_for_parallel(key_states, query_states) + print(f"query_states after: {query_states.shape}") bsz = query_states.size(0) proj_shape = (bsz * self.num_heads, -1, self.head_dim) From 0dba87c787a8f5175e49ce0a2a59eb50cee1291a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 18 Dec 2024 16:10:27 +0100 Subject: [PATCH 27/63] Add verbose information --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 382642bd39..533e0514fa 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ test: # run all tests for the adapter methods for all adapter models test-adapter-methods: # python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ - pytest -s tests/test_methods/test_mbart.py::Composition::test_parallel_training_lora + pytest -s tests/test_methods/test_mbart.py::Composition::test_parallel_training_lora --tb=long # run a subset of the adapter method tests for all adapter models # list of all subsets: [core, heads, embeddings, composition, prefix_tuning, prompt_tuning, reft, unipelt, compacter, bottleneck, ia3, lora, config_union] From c3334672cfd4df85fb378600b0e1e0e7e6c07f0e Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 18 Dec 2024 16:32:08 +0100 Subject: [PATCH 28/63] check package versions --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 533e0514fa..60c2fd0c4c 100644 --- a/Makefile +++ b/Makefile @@ -33,6 +33,7 @@ style: # run all tests in the library test: python -m pytest -n auto --dist=loadfile -s -v ./tests/ + python -c "import transformers; print(transformers.__version__)" # run all tests for the adapter methods for all adapter models test-adapter-methods: @@ -45,6 +46,7 @@ subset ?= test-adapter-method-subset: @echo "Running subset $(subset)" python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m $(subset) + pip list # run the hugginface test suite for all adapter models From aac4038a6ccadd24b74b7cf78b6bb4ce292bb395 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 18 Dec 2024 17:06:13 +0100 Subject: [PATCH 29/63] More debugging statements --- Makefile | 5 +++-- src/adapters/models/mbart/modeling_mbart.py | 7 ++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 60c2fd0c4c..7f521d4e1a 100644 --- a/Makefile +++ b/Makefile @@ -38,7 +38,8 @@ test: # run all tests for the adapter methods for all adapter models test-adapter-methods: # python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ - pytest -s tests/test_methods/test_mbart.py::Composition::test_parallel_training_lora --tb=long + pytest tests/test_methods/test_mbart.py::Composition::test_parallel_training_lora -s -vvv + pip list # run a subset of the adapter method tests for all adapter models # list of all subsets: [core, heads, embeddings, composition, prefix_tuning, prompt_tuning, reft, unipelt, compacter, bottleneck, ia3, lora, config_union] @@ -46,7 +47,7 @@ subset ?= test-adapter-method-subset: @echo "Running subset $(subset)" python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m $(subset) - pip list + # run the hugginface test suite for all adapter models diff --git a/src/adapters/models/mbart/modeling_mbart.py b/src/adapters/models/mbart/modeling_mbart.py index 4eef5b5838..f437d89648 100644 --- a/src/adapters/models/mbart/modeling_mbart.py +++ b/src/adapters/models/mbart/modeling_mbart.py @@ -186,8 +186,13 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ + print(f"Before parallel adjustment") + print(f"EncoderLayer - hidden_states.shape = {hidden_states.shape}") + print(f"EncoderLayer - attention_mask.shape = {attention_mask.shape}") adjust_tensors_for_parallel_(hidden_states, attention_mask) - + print(f"After parallel adjustment") + print(f"EncoderLayer - hidden_states.shape = {hidden_states.shape}") + print(f"EncoderLayer - attention_mask.shape = {attention_mask.shape}") residual = hidden_states hidden_states = self.self_attn_layer_norm(hidden_states) hidden_states, attn_weights, _ = self.self_attn( From 4af10df4dfdf5e1f30e1fbea92c3434bc98c9b34 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 23 Dec 2024 23:32:12 +0100 Subject: [PATCH 30/63] Fix failing test: - test was previously not included in testbase -> exclude it also now - revert debugging changes --- Makefile | 5 +---- src/adapters/models/mbart/modeling_mbart.py | 9 +-------- tests/test_methods/test_mbart.py | 3 +++ 3 files changed, 5 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 7f521d4e1a..ed978553ef 100644 --- a/Makefile +++ b/Makefile @@ -37,9 +37,7 @@ test: # run all tests for the adapter methods for all adapter models test-adapter-methods: - # python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ - pytest tests/test_methods/test_mbart.py::Composition::test_parallel_training_lora -s -vvv - pip list + python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ # run a subset of the adapter method tests for all adapter models # list of all subsets: [core, heads, embeddings, composition, prefix_tuning, prompt_tuning, reft, unipelt, compacter, bottleneck, ia3, lora, config_union] @@ -49,7 +47,6 @@ test-adapter-method-subset: python -m pytest -n auto --dist=loadfile -s -v ./tests/test_methods/ -m $(subset) - # run the hugginface test suite for all adapter models test-adapter-models: python -m pytest -n auto --dist=loadfile -s -v ./tests/test_models/ diff --git a/src/adapters/models/mbart/modeling_mbart.py b/src/adapters/models/mbart/modeling_mbart.py index 7f8ee2cbf4..d7c5e1f868 100644 --- a/src/adapters/models/mbart/modeling_mbart.py +++ b/src/adapters/models/mbart/modeling_mbart.py @@ -113,9 +113,7 @@ def forward( key_states, value_states, attention_mask = self.prefix_tuning( key_states, value_states, hidden_states, attention_mask ) - print(f"query_states before: {query_states.shape}") (query_states,) = adjust_tensors_for_parallel(key_states, query_states) - print(f"query_states after: {query_states.shape}") bsz = query_states.size(0) # >>> END AH Changes <<< @@ -454,13 +452,8 @@ def forward( Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more detail. """ - print(f"Before parallel adjustment") - print(f"EncoderLayer - hidden_states.shape = {hidden_states.shape}") - print(f"EncoderLayer - attention_mask.shape = {attention_mask.shape}") adjust_tensors_for_parallel_(hidden_states, attention_mask) - print(f"After parallel adjustment") - print(f"EncoderLayer - hidden_states.shape = {hidden_states.shape}") - print(f"EncoderLayer - attention_mask.shape = {attention_mask.shape}") + residual = hidden_states hidden_states = self.self_attn_layer_norm(hidden_states) hidden_states, attn_weights, _ = self.self_attn( diff --git a/tests/test_methods/test_mbart.py b/tests/test_methods/test_mbart.py index 31130f0d3b..9d0c861e86 100644 --- a/tests/test_methods/test_mbart.py +++ b/tests/test_methods/test_mbart.py @@ -18,6 +18,9 @@ class MBartAdapterTestBase(TextAdapterTestBase): ) tokenizer_name = "facebook/mbart-large-cc25" + def test_parallel_training_lora(self): + self.skipTest("Not supported for MBart") + method_tests = generate_method_tests( MBartAdapterTestBase, excluded_tests=["ConfigUnion", "Embeddings", "PromptTuning"] From dbd49650dd67b55c5cb387e651f7a9d3d5ef4a00 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 24 Dec 2024 11:26:12 +0100 Subject: [PATCH 31/63] Update README --- tests/README.md | 74 +++++++++++++++++++++++++++--------------------- tests/image.png | Bin 36003 -> 0 bytes 2 files changed, 41 insertions(+), 33 deletions(-) delete mode 100644 tests/image.png diff --git a/tests/README.md b/tests/README.md index 832ac82c8f..ebd1ac4123 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,45 +1,53 @@ # Testing the adapters library -This README gives an overview of how the test directory is organized and the possibilities to group and execute different kinds of tests. -## Test directory structure +This README gives an overview of how the test directory is organized and the possibilities of grouping and executing different kinds of tests. +## Overview test directory structure ``` tests/ ├── __init__.py -├── fixtures/ # Datasets, samples, ... +├── fixtures/ # Datasets, test samples, ... | └── ... -├── test_impl/ # Test Implementations +├── test_methods/ # Dynamic adapter method tests (all models) │ ├── __init__.py -│ ├── composition/ +│ ├── method_test_impl/ # Implementation of tests │ │ ├── __init__.py -│ │ ├── test_adapter_composition.py -│ │ └── test_parallel.py -│ ├── core/ -│ │ ├── __init__.py -│ │ ├── test_adapter_config.py -│ │ ├── test_adapter_conversion.py +│ │ ├── core/ +│ │ ├── composition/ │ │ └── ... -│ ├── embeddings/ +│ ├── base.py # Base from which model test bases inherit from +│ ├── generator.py # Testcase generation and registration +│ ├── test_albert.py # Example model test base testing adapter methods on the model +│ ├── test_beit.py │ └── ... -├── test_methods/ # Test entry points -│ └── __init__.py -├── test_models/ # Test entry points +├── test_misc/ # Miscellaneous adapter method tests (single model) +│ ├── test_adapter_config.py +│ └── ... +├── test_models/ # Adapter model tests with Hugging Face test suite │ └── __init__.py +│ │ ├── base.py +│ │ ├── test_albert.py +│ │ └── ... ``` -## Test Types +We differentiate between three kinds of tests: -1. Adapter method tests: test the **implementation of the adapter methods**, such as the different kind of adapters or costum heads. - - These tests are exectued for each model, hence there is a testfile for each model, e.g. `test_albert.py` - - Each model test file is organized in various test classes to group similar tests - - While this results in a little bit more boilerplate code, it allows for an organized view in the test viewer, which in return also allows to conviniently execute subgroups of test, e.g. like this: - ![alt text](image.png) -2. Adapter model tests: test the **implementation of the adapter models** on which the adapter methods can be used. - - We resort to the thorough test suite of Hugging Face and test our models on it. +1. Dynamic adapter method tests: These tests cover most functionalities of the adapters library, e.g. the individual adapter methods (LoRA, prompt tuning) or head functionalities and **are executed on every model** +2. Miscellaneous adapter method tests: These are the remaining tests not covered by the dynamic tests and are **only executed on a single model** to spare ressources as repeated execution on every model would not provide additional value +3. Adapter model tests: These tests **check the implementation of the adapter models** themselves, by applying the Hugging Face model test suite -## Utilizing pytest markers +## Test Generator $ Pytest Markers + +This chapter zooms in on the test_methods directory. The main actor here is the file `generator.py` which is used by every model test base to generate the appropriate set of adapter method tests. Those tests are then registered in the respective model test file, like this: -Each class in each model test file in `tests/test_methods` is decorated with a marker of a certain type, e.g.: +``` python +method_tests = generate_method_tests(AlbertAdapterTestBase) + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class +``` + +Each generatable class in `tests/test_methods` is decorated with a marker of a certain type, e.g.: ``` python @require_torch @pytest.mark.lora @@ -51,15 +59,15 @@ class LoRA( pass ``` -These markers can be used to execute a certain type of test **for every model**: -- e.g.: for executing the compacter tests for every model we can write: +These markers can be used to execute a certain type of test **for every model**. To use them you have two options: +1. Use `make` command: + ```bash + make test-adapter-method-subset subset=lora + ``` + +2. Navigate to directory and directly execute: ```bash cd tests/test_methods pytest -m lora ``` - This command will execute all lora tests for every model in the adapters libray - -Alternatively to navigating to `tests/test_methods` in the terminal you can select a command from the `Makefile` in the root directory and launch such a subset of test via e.g.: -```bash -make test-adapter-lora -``` \ No newline at end of file +Both versions will execute all LoRA tests for every model in the adapters library. diff --git a/tests/image.png b/tests/image.png deleted file mode 100644 index 34e881b9436d0675a2d3435fb9a137b38dfbc26c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 36003 zcmdSBWmp{TmM)A0570QlB?N*)aCi6M?(Py?f&|wDcXti$?(Xic!5vQV?sv~Ov(KJ0 z=luMBaB+3hR98Ly)VkL#tApiaL=oX};K9Ja5XHrW6u`j1dx4)(SZLrG2!=u-;1{@q zf~Wvk*(lxt@Zg;ZzcfD>SY;%_lO7cC7|vEq%>fJysq5_ryw9e<5DYAgOC9qdO3-s9ah>y$rp zxR=-5s#}!*EE#i6tGV%7M8u~569W=}B^EL}d-~`P5(t6{hz1^4PUMwMMvdc?dTLlx215BA6;DLUTq3XlLD_V!(8mvw=}VG{stGv>UJA<5y~86yO|Tl z^^j|gY-A`V_xP>qKChLrgCh_)Y(vsMh=8DVx?#;+>R?d`wX18paQI8r?`p+SorhkE zfY>{EvqZe|l9+r_l5H`gfNjnC_2HHqY!1hQr;B*W+`K%I2z<_6DJve{rwwGo(G>QJ z4j4kVudu}a)t^eO8E(F@8Ct6rP>&5n|5Q>Moqjl`qWY`-_2l`&5f3e$#?c zR@%|QI9N%Fc#-Rm*(cXO-cLxapD>x z(J<_?19F*^sGd;oyvK^!b>m}0IEK-+#%ugU27^Y&(`B{G%Zm%6oxxc9hbuGgT0h3@ z?CcT?e0B0sX$gs#?rx#E%4hZ4#_`%r!7Zc1>B1yrp69EKoSdBgr~60=b@ik$9A=3z zXF3FByL28;$2G5Xu(&-mS?Y0jzEzWS&rg-ggj;DRt}`<;F)1n3oZp&@=THP-%?Qfa zdl@AQA_#eDomXA+-(%DB!CXs@Ht>^%fW~riu457t3lC1)$vAQ?uogUOyK6R?YO(|x zHGJXQ3V|CQ`^H#d8ya4;%U7>iqaODe?gtTmpcEgo1`B*rgE*CzswUv}84iWo< zCf%l!<(M{MkM=hL31?|IIMip$y-WG!yFK(`bN#tFXLx?Y(zo4tIQ}a6OcB)lB3k3CqNKEg>JpTWBJHg0uXC@llr)1Az-Id)gkbSlexvCbOKk(YWnBudeN4=8YJZ!%39{axzxY?QA2GY zN~}C=qmJv|{VqeB;jGAGv79Gz*yY+^#H3bTuj<_$k^F^0r5$Cr)qB9Ds{yX^y9E&r z?sd0{DA~7bqA(((lkyuIUYrZ&K7$aU8FW)Xkb7?noo2ll)P;R%Gns#s#fNf9Ibwg_ z=Y!uD%MQFYzbJ54sXj+Xr$QQ;>Sk|LBc`IrV6oG z^OJitfDI9xxPNf4_T5<;hyI3)MZJ<|bXwXc)-|sBo=ybfK*?DzUfXnia5;1Hq()Oe zUydftBHNZMZRc@1$6{0gv7`0pg)rj_C8J!+nv$-)>};}C)V`^i8K)BdAktl&zIRXq zq#Jq{`r2vebJJ`NN59=~ttCO5G{wx~0eaVmGn5d;%B7ACjC!Amk@wcIT@%o{vpxht z?|$5{@1a!D@m*Ba{h(h)M@-x?16@MR>!@qdo3WVbbS`Ml1vz%{_SAjZdaE$K z6Jv9&6#z5rSuhx z5A8|x~{uAYHC8GizJQ!961NZSM%g0@NW-1wQ?7HZw>G`}?vjd?S zU;d;RNvmUS*Jlu!(7xUMwM=g3(#Vn}X&x7F>`sTjO0=3}Xd7w5zEkkr%zFffgjAKJ zyi&2p3Gd5PIE^08tBOpmm!~!cKD<3W{liY{W zyTUU_Bvb5*B#h@gYyJxT9y_Dcc7UDLR0svqRbkR?V0d`i(Rm#;b+RVZl1F1x-7ZO8 zo_K|Z--Clu;gcFf14|pZE*n@HT-gi!M$kazD{1GSr3S?62Ud{gkWQQq)-G`#Q z51G&}o*Mycs#*0)5d_?nFvtYcPg+gi25r24D@aI4q~y-+G_AEr3V$1j$yHwaHQ*1o zMWa_71K^k${?5)0M|se5tLNhzzSX1=4J`^s8EPW%zYNHFk+|QUPS{hHxCdez8>YmR zDYv~R4*c=spyhbs_uN8^dT-bVgO$AzWi@v#D*m=)xZ1d)RjYDFI! z)c^9RQLzu>U>D>Q8T8zHC@CweGF0@R;ES@NJFF)zDd|&BPfz1nuwbBqR@_3F7NHt1 z<*0_TMzd53TSqLk9Fmu9cW>`LgI5^PuparM1yP^18V{dHgluAsXj+t2KQ4{M^&iD4#jDGI9-_|d-b@Cw z5WISQj?+=PeG;JOtjS-KIPid3ns5wZ5pkf zMPakiDI60Yzc+l1Smp%amMO+Q!^6NTi{(v2R~*<78)wru+Hbd~D~!7Bd>EoZA5T;N zS~f{+aOk`Amn${b(}r|b33SA>*kAqqqu&{s=;bbGHKT+RO+Nk~A?$qUF9;*R_-Mm4 zQwkrS(={0`G{wwA_(eiNv+i*(I|5yZs2Ddm1qQ> zd^|m7e*2{FkW;5WTp1MAw~7K70hib38Y6uS(*UCd0?wP*z8+R#;U0CELwLA@Pvy-Q zAs*K=D(Xnoi$Fs+XTAoh9#{BVG*avXmrKRd^I7`7YsPq0PG1DRMJn%A4nt#RGB zKZ=;6jni~oS6u8an)dE~i}1C;73>yF-oGGQwwZ>K#2=&(PxWu{u|w!Dd=y~*4}4T0 zysZD>ir#~^+XJ&(i3n6ILy!A>jU4V zW(Ig%0UdZFD$~a>+nrCJJ{|g!j?w{b!&XC9r&v-XogUNX>5Br`+adqPPbPOIN&pD# zX$1a(r%aOA8|GxdXHq6?%76;IkcHweyhVg31zx7VRfH)5fTy^SzvY?pPY*`Z4C}L- zqx?!-l&d#*HnS>|aOx=D0L{FHueRhq>4$$zVf#!P=~cKS_~8TUbZe?*Yhk%m#VBwr z0rj88Ekezi9{!GQ`-uOZdhm zX)G@OgG)BA*I2YY?I;u$7EZ(m>(G`dwg4BxJ^l%kG)s!8dJ1T`70z0+5hs>5q8juv ztGLKTWt5fJzaITAFOkx&Idq|9RM09*tAN2`Np`LLX3$=Zi3)Cv{Ut|&I-@jjk z7Es(*R4W>=D61}+?CdZO2GTE{qi0}aW>$V;wY;;zkTx*O8X*;VAAf!Hb9;b1J<8wz z-GMROEzonjRdeZyZ)&=$wW}sY^1dXf3}MU`=5sk0&wXvF$Go150b1F(sk~4dq6k>% zl-O7@hdrJgls(VyV7(FFNb-v<_TCdmcvDa+WNJDa$llkGMIBurOlL9amNDdg6@@eJ zrc@GI4TNg2m=kpiE4U4+MV3UxG_tp^8ct#V>i0;iTEW*P?fG!PFFB94!~1$ET*C#B z^y0myJ~n?J;^C>$RL-McM&0n2RrTK6nRwF{{!>Bt7nrn~A6wk6Cu=i@g+npLMMQoK zZPL)vPC6jgvrtf|#!;&c{!sv$7WnEy`o)9!%4~;nGd{e^dFbCk0dY_bYkgUSvW|z_ za+1iim^{3X$6wezAF4w}3*@tv`ol7g=g-T~)daX4O`8c=1A@`W_w!L!rl*-c|9;2C z!Xgq`srs4SHT!gKHx*c|PPp6C)0HM>WGg?Ah_G;zzsFYZ;?ejhKy(|Ce;^ZvJ){gb#+#-|r$6Qpp+OPCdq0l0l-HXI~@QUr{!Ac zAXPQB!b{lAl>xHV`Sn)e5@AgZfIZ1y*^WdJ3m(F962E<(#+{~E!u4M_;w)jgLZX%B ziN7{F&X{~4b}Ob1J*w#>S@5bA(x#*SH60?MW(-G$x%N&KH|zDR?%icF42Q?hw6wA; zNL}eh1gwjy-@j8tr6F%k-_2Ww!=b-Fny7aeF@^wjNy8t_rkWr1wXe3g8%^ejYf1^h z5|@f4!x}gUP5jXo4-PxVqY#9IbUGi&8vYWapZon#GTuXE8@d$DhI|9!hGTHE53hxE{Hc2yk1e ziF~sFZ3Z>;GVt?X1R1TgB*c^VFf)%XKg{o8SAzexg-#DdCY(^p>rZC3h$~UM3>jh$ zcI6#rO~ObwMw*-|Qq=ryHuWiy%eAVQw&c^b;YowvOjE`C5hr6|szHzQo_I$Bl!Jo0 z&Z>fwc9H;}O@d^~LEu(DoU(5B`F`CT6;zjCqh`EnO3{|v9{!-KTJYrVQlIme@ODAA zhRT~`Fltvu7GyweaUQRxb|~^zdDTOU6|fQI@i_;xm9-K?yM2>G<7yHdGDT`rXyU?! zdpO9&{}RbmYUtkzbbRvA4c=RE-bDVJ4E-10!~b7+(!cvtbufJdE~`u&^?ktA!Bk(O zy^}ANU%A#JJn2`}47xv6j;%o{Ap4f7Pv!oC^M!B9S7jwP&4$|2HZ1Np5S`uCzI<7I zFf6sldVAkB#QR4GzZXDGih8@p8L|#Y&7l%_U*#97j6y|5O{IxyVPOJ%I*I7$bd<|9 z2gCsCV;?*933IC_6pQ(ng7L0dIN`kJ5K3loo>U@rM@L5t(2nBXME_{+1w(+{NHX8T zoP`D#aVa?8I7-^m%qY#P4BY#}fiEFm~Q#M~U!AU^k+Bs`;PZpDM4`S}kd!+^LkP zo}%8#@^Tkc;+^c_QZWHl!gw@Ajl3+I^AwZ%p-1RAu;52v-~rE}V2&58B|zD!)vLvz zW~Odk@Uk}EP2}+0C=M^cx=o0S z`vkQ1GfY$Dz^8ateXxnE8Ud^PcU?9N7Ag6XuLl7MNd|ORGHG0Dz zA#d9IK;2hYjw6Dg&8D+f(Wx(tj3pOaj)WOyB_)WfqjTII8Y~xrD7t_mOH<4-sKit9#Q-BOKR>u@ zLY$^^(K9eD5OI!Z_L8JDjRUrCj^sajlIqF)?v8<;Q0_y7WLs4Kp;S< zPNZKHMme3%E0gM3CiW*{V-qtTPN1z@<&a`MCPj2@-Ngbg;ZfB9#r*K0(2qbwa9AhM ztlWgE@qKTbPJ#~+j8O-6s!%UyM@u08oeBk0)6}$JDEt7(vaOwjyu|?T_^goWmf*U07Jy6@_gpeOy|TMoq32Ak}#1 zN&BBiaGs9G7SNywL#C-Lw|WLPpZ6v=>a~3yqwk#Tq8tVq7$Vga@mLDX@{zR}F0g6S&8GU8fjSEE-l{5jKcd-__;k z0g{xNC(ss~{ogt(X3}L6dMV2_#cx=_<@Heg5d&SKm8Mxy*{SAxfL_LW2Y(fyC zw2FsIg>sKh`w6yg&0@#+0yGFrWuL$n|(0*B8KV(5yh1C*E_+HR_)Tb5gmk^90W>x#Kv`QRNd%$IRPbmdU zJd!a(EdsmJ=+rj1e-X55Q->E$A;6{M$vluv;X69(iX@3BtD-Y*KygDD5@J|R=6hs) z{JAY6BF5<+idOzzA64uD<}Z&^a$zQX9fm&KX_aVR6dXQpel#GQ8NOP zCDL^U`aW1UOU+a^)>+hV+cbdoO!}N$HI8XeWBpcf!33qkDM-c{E|g1L_(MWN6`6>C zxVqk}V+x_n(mi1Hv0=?G!OIKzYyh{u96B6vPGWTt6$a+}65%E`{Vp#&~r8f3L z-a%WUxkkQf-JJ+nhOhUF1rOo5B_-3vVt^3C&&z7PT#&8{CKYwma z2b5~yjxTjMwso8|eF5A1L_QZ6z`^@+=W_!!=v$Ep`wBV)O6^KyB%!`qno=1HksHs$ z0RArqz}TRfT9$QlYxEDCn?)r0^Qnjz*etsfMV+u=Y^yF?;TM0pkdH9&@oC=hI)g&L zwPH}AGc69vwEb!GGET1ae>WdyX5322b!$FEU-~!M8XKoY16l)Zr3D<6ax$?Sm{q#~ z4)u!~P|t;ggiPIBTFu`&5UzT*3~ts|r6|ORtr|v^pl*e$s6FrCuUbPLc0v~}vt@>AMH zbk+;5IrK}wPZ)+%3+8Hp z03nrZCX2`~EHt!&^8Ov&~uhqJZGf+i*xN0+#zCs2BZo#48)r71Q-PR~OmJ_tf4FelZx% zI{0BIB<1>=w;V?v04DnrTP65x;>S)XwU|_zpAqj$1h*L z+S}Ww6t4LcJR9_eMSDKpu>a-9V|uan!Pc3biQ8TD^m@jmpK>!z&z(IB%A88Y^RE%+ z(yG{$s8(tQY6nXvS1DG}65h_{DSXM^w6p zFcno;H0s-jk5^Z89~l`Zx19CPu-rAGwUv~tVlj#8YJSug3c-=S&xg}SI}54XHuz162gDLX z#AD<`SI~P{M1LZ7cC{(HmpB>?L%=ei#KyjpPpj*2-5+;ZXmH5aZqn<5MwEJg4%X`# z#AR7oS&1`4SU=Zf(D&^FlWEkhYgbTKOY6mjy>vQ2gQ2ZOI8IS+&7C1dfzpZJ4>`tl z>6p5;@Z02w+-ES%6k#`tO^NBn2C7iq|c4VGcSx%ET0SseIlPNG6b+TTg2*vg9B6of=>Yf z0p^+hfq@ooU%%HFQs=#?Gqkj`oyj2Eh=gC&+P5e5x1oi+l$7d`9UEPuS8hNDC;cRWT`V!U zvl)PMFiWL*8)?N^;1JQZ=RH?U0qqYhxzk`@NNx5+xls5uUmfxV@WTn?Kt38MambU& zjq|OR$JMO0m8LBxM}!n8xHP9Ahbe^2H$Pi0 zJNc;9e5V!<6-J1uq@+aYhzQPVDInl`)a2v7L^}+qV3>EY^qcZw4vE63UmDwZhFo^r z?e4kE^8Ig)^n&#pJ?{5pQ%q(zVUeQ#bW|$XZii_$s@hP_kYdkoxcNVhTJ|cz9KEyV zsekPa$KA6JVDYyj?95`c?r3i2GAWyp5hZ^!?FoWUNk`Ucp~F#NZ1$$saSx5$g!dTY&n>ixF#}+ z&vJtjAD|ZmkjKfwR0juUjr2W1^gImZdkC6-4q-~67J#Yscs*$sX^Y>bwTuTePq)-Q z0k9T0MjxVCmFYE`diDJ^FdmJ|Q0K@#jZS%T?#EYEDM-F=OPu=XA2sxSjeA`uNr_s^ z?Z<%h*B|1-@`-bSx6x~7r+W5xk={RjyApxu5nsh=5s3K95^gNqRZ6WO9lim0ewIrpIWGOeb84nG*D0Q0#rEzP;!?(V^XpX5MP zs`m);h2nQsmcVJtt@aTQL6nAdgD?-EaHd}V&fntscCvVoEIj3z_FGqhkvk z^5F;~`S1_{XsljootGJ8gpoacA7GGM7iI!8GxcU&XWV!}TvwOXW?yUQ>{d3q0UQ*25i0s3C}S}Q6ok6!aKn_fkuc$V@lh*6AJ zI(+;ZX~|(LtW9F=eL4zuluxtU`-jcXb#-$6NNsS|AE00j%I6k(A61Y5Qnsxg@DEP* zX9?yB=nSH2ef~#|_UEXN**8mIdl#maY-P(WgmfQTx|m1jGgRR3k0;?PD%;@RZH^k>nZyMVHsy$_00;1Af^KsUX5+Kf@78;A z9MRdnix|tT%2`hQikoyetrICBy#w`u{+Xqa%YF4Iw3m+D#d8VJ(=_R}KLIP@wCU^+ zW}AGIhEA?yxlDe4u(}5GaP9sY^^oMd3OZ_Y0T(KQ~&B7E9cl9 zv?>e=%8Cfjq%u2Ld)M=J{eFauvYQVkF$N&MQ{I}>e{oGxrS7cCZLP|=>InK%u6vfiJFCxq0 zf`ZrcM-{i>Ev2Wo5&f_EGO01df4PlW#lQRPn&MbKqE67sTOCC1Pt`07Sc zL8v)>-+(+&MrR z9PV~odlnEjmBQ9L1*_fhNIkoLAld_Vr<3gu`b@3fk-$sP#_Ib?u(^yCwy}5%ELRHW zo)D}noNCAu{w5FbOupn%zz3OQ`51EJ^>i_mU3LXvsY>JXD)>7JHa6AE@qAD>;0Mex z0DSk`=JIz)n1L~`o0Q#bUIKT^*4>SiH=f9G2myGjv$C>QkL9lqbTou(WRu}{ZjWhv zLPGx62Kr4$Hh~~}tM$4u27!-#T+ZdR-QeHkyUnL}eZXy|3nnhk`(QL7VEBD=Ra8|e zl$2&>z3TJo{DD#2&3$=uNHrry>H{P?MMk7Qot|wLbE|Jj>#*Qy5!eiRz43k9@s4j zHD*qoVo>W|I zQ3Qrtz=7_Y8MJxtDap{F0uw;KR>;SURc~|R^zq|UvU*S`CXGTUhMDaF zko#aSFR0Ph``p`u7!Am@@v&c(zpd_IGAAbvUA^<~&yKz8AOY>U4Z+G@)g z`7YiVLoz;1&wlEco`SsM?)8S}_HC1U0^ZBkTF$y{-j^C4P*&4v5F{<}f}4hUip1yq1O#-V zAJ1O%BBDt}`!#G@as^B$iN!E5VrTDl4w%{5*%xa8cfl&ZaBqJio6->uj(741@C)>` zR5IsW^w(NF0fNCd$BhIeXZIF}aBd#?2};<99Xjm;Y_oTYuA{F2XWb#uqP;r4hd4%<-sXGx{AhdeAEm;Ajl z>j9R{$=B~fZu35-9~{?CCQD`g$wFYZyB}*1KK0i?m1x0|C__)xh`%+#4`iDO8qG#G zb!6QS$LsqP@w8eohgA}u-iOiBFlGIc-6vzCACAWav}uX(ZuW=~cki2L*X^e_-r@r} zrGEfGpQ8gjC^3z{+40>8o?j96JzdWV%@?XAP zYB4vxr&Jd{fTRR^?X}T?oAX|@J#Zc?eTc&a+>kRVL|hP=-x(ndG3)g>NU^VO4w@Rk zWC0w%wH6^fA~uFidauS^I(v`4Jq0y|)8RryX{W<-(cHWX-}eb#$%<**XV=18@o4`1 zouPH-lV(B|e3rcaViz}AHb4uZ0^OEu;J;LTo5Oamzr`~kTTbY-^5@?}yCw<*0xOv< zgSOs}`UsEMg=@qY<3Dv)+eNOpJWuUsCEf#+_z~DmRmxPJ-vQkhS{{2Uk3%9 zHX{F>!GqJ|Cz&Oz5bgCraD0PyBCLNx`wE5@2_4-UhQ>HPnmmC~W(p8g7))X=H~r{( zoS&O(M$P9_a9*rZKI^F)Ij=YNyKbuG8P+%qtQ|0CwGK|}Mf>QDvU%R!GheSa1JwA9 zO4~Qrz#PD?FaI_v`4yWw*XnXyd0*zR^4O<6`eq=nxAT{A9RD-YmyXF&QU)0R4Xv`b z-xA%9pAB0BQ&KdGPr=LFHaCBmwk;kp)6w+ZGMP_PJmXfZMb@=4&9_l$#wRCR%s7M+ zK%zx=FRf`Cw0#9!`E#n!i3w)8MdxE8lP~6b?>Se;#d-hxR=^J#nxUSBVZ6O1!1N3O zi?&MW4?j1uA1XA@`z&mV=!i>{1wDXRbBych3c98VRT9%D^!E0eHvmbk0yJg`zggGMJnkH>`ZNwxv94oTK^ zG3sdVLn?&cS19QAB;2%*7(+`|cdU-)`F3ER8fvzg&q&mWf%!vO`Cz;&?%S}f7aCM? z=K>Gy^C|=9Wn_GtRyl->r&u&x#6&~ex)z0-EG{ZK3`VpqhuK?Sp1u3-XD`q5XEq+B zRL|?VQj9O_?M{&lvcLaBrPrN}y~h~((b&=3Gwyu&`__f)q)hm>_hy&l{l!>1+gM2t z3LephpF!?=|Az+0%2mZ8QC;bF3}}_Og|JX`UvO4TO((#{b!Zq|L{*h97+WP6=Hy4$ z)5EUAUAC77_oG>ktdO(i9s~fzVRu%Yssf#1EiN|p1Qp-rRgSPh|1+- zhd}6hx@Z$<=^)jwK4gC2OR(=WLy3vkxMm@oDZ8Pq`RdUvrkd;e&_Ke}o8bGh!|C9^ zzA7qyeCrwC=i&`hfEw(d1OU(DVjWKe_E zMqNK%YoW)^4m^^$TMBjCSAUyC4$*}4&=^M`W1~1?p}ej3E@CAt)ECX-@n`UXG%uKr<`ep$#-+%3=gFt$&v+T{MIY5XU{{8JK$iI{Ob;x&siArn*{*TB1 zheXT9T%^T2;eh@TWDm*4zH4;Ral|BL&sGI^BopyP+@|vWzq*Yl+%``#%~!Lo+%@jH zLk(-tFYI!ENIl3b!#~g2WZQWJIKGE>H;X=Rp*=B!zsKCpAj**i!BlLjnc8xJmO&fz za*)*4e~zCnX(Zn8C7*?9=6@5`%Rl|WsV0;3BF5*ajt8uDAbRM>TK4z!TkoDLaCmi? ze`y|WU#bqu*B%2?mJ@0HT6rb3%2wNp2SBV94SegM`wjCT-|6&k_o_4!tdF1NN#6?ftk#jNvr zXNHYsf>Fv4u3J*b5nlFovrtN>{X*DMe7#h5nN&Rk>D|%E!WZSG03JbVktrE>!cEO? zrsD}xiKQd$xE2EU=k^vWAmPdAc4f*9eZ%8$M*1uA47n23!QLL|AHeOIg|PuwhWjqI zXMZ1u;)L^C)b9^94nOd1?mj(%=(AilyO=P9<=pb*AR3+Ge4>cBYk%u@K;Tz1WQ-=q z7eLZO>j)+3)Z0dr`2t!xb_v70zFZGcsACFprFxZNAv_7R+OIun^p&B~l9DQa;?6`3 z4H!Ox(T;}GQ?+_4G&D3Lzyo(9SoCG#*X3T3G#IS*YZm|cNp17mSNpg3tCtFgFIckU z*YXulIC=k|4LqT>K>CHqlWQa#rhe3-6Ln=M`JOt{KsWhK5-aD@8AW z-2drHtpbF5cK4f-JVEr9>u58o?hC;9P7(!tE0<22bOV2}2-j0>tsuzbte}~K5pX|B zl4eqpHk5iG+XIJ;{Ij!Dpe6(uBx7#*>_Ul!atBNfIuiPQ`=^>7<^W2jf0MZp(a2@S zWzp{OKB9;!#xR9yV%x^?24J4Z=5FmzTN;ld657Mt_11Tozj zxVHo%NH}vMy{?|fItrq z-&`IM;LZXJCn?b_2ZIcq*S$0WgE!4@EFl5ATz_Sr5xje8!`8(zJHHkS9sL1)p%(&$ znu^MaU`G8%UQ4glVX;-3sRSX4KoBP&E4ZLF{ZKGYVyb~7Dhh-S`n?z z%RSm&Vhx)wm&72Ck^*sM2>*abo4wD&G4k!ER z4=Ecj@8KVMTM&u|>fuIqKikqR=iWF}xH`bG)Jple<07U_5?B01(02{6xB+VWbxR_^ zeG%Zc|8Y|P*H{Sx=JzZm|C~;MFW~~Bd6s591H*o@J&>U;xzqyl1x{f&-xZQp?Dn3+ zh6#9gAS$K*g)lyWO%`gI8=n_eKv1xTdj$M4Hm?Nmb&B^pcHj`utwc1ymh;;+mWEqX z^R7^cfjxFc@P}@MGLDwJYdi0AS58*J@KM)e|C#eGJ%AH!;vE}OCgz@a5i);GX;zy} zO;5h*<6(r}fn0yupBXJ?JsY8*p;b+4f9?#$7miD%J!!GqZwI}N83A)H({8>?Hb64J zQ32KL^mwVRT&5u)0LuPglI;HBsQkL)Ia@UHusuEM59vrcph+t#D((wJFWIzdT>@TE zG3H6j?W#rWrB9kk`P8#EecOJalnos`A1(|M-oCj3Kuw)yjse9&gWR*-2mAsUs43Y* zkTY?0ZO~pDXSfLs38_Jzh$kjq*^X7wjxKL~7$oz`YXRm{+@4_wJ=SZifU%rk052Z| zRNNe!wbqAc*Q0C#ZW5zE>m3)jZQdjRs6K6Cj!jCNo@S9I9}6UG(>s*Tqw$dUr$1ZQpd|`>T|to zVSQ!~ahvVhJ2txGmKXUZjW51Q<1a7(9@^LZ9pwN25t9aR54ZvUtEH@so7Rt}({_u@ z-ta{W+SB`2qh`x@m0#SK!(T?T^Ze))=vuB6l7Fz9o8gu#(~7R42#9?>=hXAhRduA+ zV7&F-kt#&qWL^bLs;0PJJ#=PlUd2hr1y0rCBqpFf%L_iPrbgn__)kspT; z*p4fj@U1-+RWbEOubEI2S_XnWcnjK)K?F;*GeYAH-coi62e^zJ(Vw3gUDXcDOwhz|+MNE)lUpO1Q*fHm~-0ItPz$=YOY5hJlGGp`=t( zLs)ho-SM@d0{Sm}>zruw<_)h$wnfTX5ET|avNOQuVA2MR11-!&q#=0E#}nSSPWmV( z+p?MzK)6VLW8=Iun?omF7G)A2v85xv1t2neg)RRa4UL2^5nENY2&D>WrZR%ha#~s^ zgQS4})LHIdS(Kwmt6dOn-O}mwI$!I@vB&1|;@lFV&u3bpm_`*p7}HxG1MHO|qxVUh~9v8U#Y_X20@x=UChTbm zkQPhWrH5E_l!N{(JKmL|^}~UiKx~?y@5s z&UrTkgRZ~cZcCH4yVr-EtEndVz9%EYX1S@NOs=eDHBaRE_7fl@n~Ewa#jbvpUOsYS+B?JUB*fL$23?p!uK~Mt zezxs$u)a_9p(&E>s|RG}KPI3F(q|RFt`zl>!8#Xx#-f$^{pX#phO)AGa67Amt27xm zx3s>2fmR%~&|~Xks}9{tz0(GXH4??p6A@oxbaeC~OE_R6R1Xh-d4Rhuti*1q8A+Yp zhXK6$&tA7a-e$^`jt3t;u}XjVgduFCO}HxR)z%a^u)+Hx4X^=@uSbAL9`jF)_|hbq z)g}&zlt;l3)aw|JG|JYsw}TxGf2;DiXT+~2mwhS2W49NYEz=?=BO^0M?AYPZ>wG6J zEj`2J41Aq{O8#G6C$XMa4;!0vWc&}fGCX6pjk#Hf%E_z?#i0~wi!-_thj zu57QX?q|RN5}$R$4(9#w?=eT7v9yTVv&EVz<~3I1VeF$v_<#7g2FGHVw4{nq^14`%JSv2A>uJ{672~a^aI{PCsluJcK9{h!W_AZen8x ziUR^J6b*CUFw8klpdtz39g_Z}+`4`cV%8MtYfLjlcS9^R{AMX5iI9e+ddJ6Nw0H(* zmB8+h2SoMXrnC)#G?iN^HxBIPCji+BI^h zJERw$dU~15Z)EQV7;HYeOg6m(*}i)cCJ=(Xg|ZDpl)sUl_Ow~Yaewu=-;q~wlas5U z!p}miWN|12Wvs93jtJ`THv>|tW*@AK#2M+nafVOJ?M~1b03yU0_(s zT_`;ZZz>-cK#WJ`T`oL50!-t^|5e(12T~pP@BjA5R`w>5O;+|Qm61xwmPCYuY>v&b zQz0`E5n0)LWbeIq_TKaGeZAE^Klgoq?$7VnfBv9zIPcfEp4aubP9~A%Z>%k#L2-hm zBT@x?ih0j$w?pav-Va>8zgk^tXh{3l%{KMxSBrF31R+<+))ruq;HJ^zH@}?rb0UAn zOcZeol5-#@(~MU0IVJd=lz&d`Wiw zo<@-#6mZF)FHn3c##$A^#M}|FP&1&r%B}Yrgx85;j0p-#it=h=YgZn<9u8o|n11ei z$9vD8d`u8SA*IIrM*{FPM3XIrQB|BCtf$BkE~b% zkElEGbS2;^y`-5uwezYe3C6-8iu%l^CZZbzj0sOPXALgO6#H!OQ30~%$s=D@^YNIM z^j$QBieJT^d%Ax)u0vA$xVk*?Yo%bPn8M3)Kro-QxQW1gXe9PCL|_Rw(RJkNs(T(> z_hkn%u6xcGg}V-fA3l3|y>p!L=E~0syv9kOZcug|P^3)$33XC*ThTz~EI&V=yS7{r z+W{IGo9lWWI13@T#{C)v_As~}aOv|8Ue!$%y(`x0TZp^;Q_9n#hlW_YI?@MId4TIN2wO?iEUZ8l#B!w?AO@Cwe5}Q zN${+Av!mdgK z?*MuZT)zMOf5(o!x&pr$^1j?*;P8vo?`X`5>tljv0|PsmYqZbaMRGrM*yO24Lyy0$ zNqd~1Ze7S$OiQ`uxY{Rv}Z&a}`Y0`uuU2pu-Zrt>> zaVfUqa0zJT6ja+@8Kw+PH}dp50$efhWx(PCA=?Xxxv~+NnnAJ)9BjJqvlYDzEwvwF z!W!?c?99L4ofjKA^66Gfu-1)_Itv=Aa|! z!-u^NT!?-6zyiHL`&uiXxHnPKI2E`dccWuAHICK9s|W* zl}{x<6M!@92QZ4I5`}qkKK7TSZU)oa?-jDzVZ|kHH{3mUNQ@Xvz=*g7T=@1StdUsC z6QQr(FLN4h&>%z0pd@xE_A;j}ZmZnuCVI)_z4D^N9~o0RZ8;TMcM7aLm&wQjpyyrk<(CK zCH=!G-0UX2DR6Q~Mg6gUD1(rm({w5~ z%(*FeYC+pSBPFb&^d3J>jy_UH(i+&i-fYQ8;*;i8D$76*4=jJjmb)#aMUa&1ClfMebm;* zeMTW6l{WrGyNT1GzvCup>@ zk{?4}`5ocx*6y3%EjNx%R8hEKr&GjCLuf!ShQetk!< zn+k9Od9?uRpXp%NI70a$fl0*QZzTt^@hoo(P6V zLn0HyUd~!l+!2;jvEaE$I+ejT-nWgsO-qRU-1m6R#C6rxz|yX1$WgAj5MhAJ!o2hK zse1DVpo+ptl8ZTn@9J}My<0a``eomieE)m;2a_l@!)+DMLZ-3{xDwy0$`=<@Z_Hv+ zUHO-{cv!V?VGMPl!eDC?<0($_K4x)j!}B5Fl7E@ zgeVz5n-j0vtlAt>YMP>kcbAk`(crZTNUN#cSdi*5s5+U{85`f;PXI3IS;xAg|A=%O zg;+rZ3Z8Uf9@4z}eU9v4G<7ZV+xVlGD^f4o#e#^Rh9;`(^)LX*6X zw`)@!Mc>OT!iM-XSFsaP4|#(9RFR#mYMZl<^3T|>B7M(y1`dIMw!Aqq27MZX3?#FS zJZbi8bJg5WRa#5!@_Bd-rr8L5s!6WW&{U3X5?UI) z{$>BaImT2(f9CHh*Hftx@bxoEG`(fmnO~}*qJy^DaGpia*Tq>UIX+nD#9D0!WQXtJ zVC>99APXX0z3P8RH`N|BpCFbEBvt_V_z(H!Uqycjs>vH6vML)1Nq*&&%1%p?S6Ss) z&L;^vstnms$f%w+8@=MCSwHiGHd#(!3a_LijG-atXtT5Z_2OwH@Iaon zw*DHZlXJf0+)R3=%M`@oB@;1T@xq;w*A9q+^8z767f2Xi9aqyXv8d=|Iboqa2x=^? z%gir2nTJ!hI?G`=gtLJXZdKcvj;YUOJN{1`xNi(p48KXIidotPY%H+zPo<|xqv@UcT1UC3!EUVYFpx-kI9Bh}w)4#-j z!;$(EV^N9cepsWHt%qo1LB2;pIXi%n#r@DqUXxhmn^5>fsc3FCj42X1tt%ctY1of3^ylAsO(m|A{Wcs3! zOyhJ|EyL%Kw_&@*uSb}bqPU($cshNqXvA-3$MXt&^7wH97q|n5iWeDF!?Rt~vIF;s zNWA#g$4j5=GEx>#IK=7C5wJ+$aD-U=N&D)WFlh<&=An*fGBGpDIjjw5um4VG87tI_ z$m}t8vtu9QytZhSV>IhqAfOR8%QZw~d@W*HvVBrT0nBE|p5aYTV!4W$=wRzL8Cq!$ z?yK$0%(fTswmLH&%5;)>T9WpTY2fY6j?xDrg*Gb)7RFAJ5(`NJs;CKOGEYAh6sf8{ zooJ&rqGNne9ask4%8=uQt1L)gLm8MG34^r4x6DkzGU*y2FgDgRiFuO?7?g#iJwzzr zzb7mpnw^J-!1-(Pl~l92+s27}@l9r=Ix5rB(w^1nW}Sj%0@%MBP`sMvNL}$8 zXqntJdf0|0Xb9hSw%{{(qQ>a)1LK9gasi`R9xA|)f+Vn|aSTw}g_^5@(r$!ADIqng zWU5RJwj^fhR`nWy2h@LNMp=a+Pp*GA-JUi5eo~D~sjH!}LK@JB!UQM|J5Hgl&j6OR z-=3QMAaq)gVw^~UXgYDas1XT)c(@-;gURvMUL_?78yL6%@t=g`xQ~P5#e?WzUFbTz^07wWbc3&g`)*6^ZJ71EHDb6^mMbuL`TS9<&rfJUE-nD#N9AX znl_G4-? zjr$O@p{x=Z!aKD>V42USN@R_mJ+JDdSfIYefUqFFx;UVO4f(82t;2>5!$^3Zdx{x8 zn}aj%5h}73GNO)uWny9~gN{&w2d-cl98B^ND&e)?DCgs(uk7hPi&E2ik`fG~4@Ri1 zZiA$;sVX<@quezJUwV~fVqT=Q)V7zD-`cRdYBgKjWVGN_*|hQQLPwO$I1`Zsthznc z{*#s0$ve+WeZK8+j3sW3<9u@jOTNRp*v&izszC-_*Q+;_-}d*btJs->2&hIiMk^TV zG6HVf`zx92hm)>3PrJ~yC$sg$?db`fAvOJ*w4h*ne$==5=~^H2ygFNfAtS#V8mS;# z^|WnY0Z8|gLr_*~9m-M>GDvZG9Jig?_dXiho--)d2p%09>)!T6McUV@+3#aIAI*3S zi7~n?G4%h=e4Q@$eGRwYj2v`_vjRlp{;;s_*zhLi;%o&-U5*r6e<}(EW+3nXv@!ac z|8-;hV#yzF0nv(rL_m1<7Pt}nr-#waKg#53hft~wO3~LBvGp-s zF(e!DYM5KST|ZcJ9`dPn$S(3bE;K*po*AmUmrV!>^A@68oDTev10yE$9C58&hVPHas$cMJYdjzGY*=EKyga`LA;bgP{vvH1JjJ zXRH4oc@~%a@9JI_=CW0(Sw(2jY@RT}@^qNRxMn5pmTB0VeBZxCuI3)1S@&=m;xg)| z%=A4Z0>+7;yDQRw){0Lh@H6t@=T$u2c_xBw5+jq@ac-TW<}kX=Z;?9M+8mND3a?Jr zEf4*_y2?>MzmIm61XAC5+WP5l{)AhX@Q8`Eu3!gXQQw`YDWKbCdH?=>;rE(JYj!p^ zHXRPmOx-+&^%@tKl?=9dxMEtWh3YB94P;OkSWyY8>BLWXUuUF>}c{5OQbB=;1+EHx3~SR@kbi|9*xc>4JSEI!BB@DKrOY6CCD)UcN*TwVLd$2(jVFxdm0>h3r^EFqkFC{>M1izTgvO?hVY z=<+UZfuaNfJvvjgQH3-V|B~xsBQBlP5;rpjGlrt6tC*OW)FccV2=OB$!xiO+5A71L zXF#ykFD->v0Eq2VZvolvn+M>CpZvx$6-7UjR%JAYVC2lJO~JP*KADyC0(P8wvyC+I zJ*|(qYBu;@KanoblpV7H#}u%2ksDi;qkZ5UfG1}m>UM-!F!o6*WuURaI)KYRYgW|h z(S&9OyNAXydZNI-J!XAz|8V_i<>B-HcFs5h26X_BN{&kxp9%AI047gnww(OXK-%mbJ&X5N_ z%ij#=8m)99R}Bu$#KM@~<|b8mIX`@eHhYt9-P3-Q8pPv+a#ce)$u=H|6HJuXYI=~$ zjgnW}rB-P^W@Zt`Ztbp-X(@-vl0VZ>4T{A2#;XS?H^=#c;)<=@BniZ{T&U}#E%amY zDZGsEk5gr*8?J6w6m?3T&Q$r*^l1tGg%YcC^}}YzcXMp?l02(!3Yet}I(gzLZ9MA6 zvtKa`Bu62kp#$y)Nmg7u{N!A^??msG**q!pc~jzMG(eLn%5Rf6nF)LnB-GR^`KobO zHfqPOZqOVe@(s!4`c-IVMun1iVF}A@x*k*RQ$z0JRVVL>dk_BiaX?A6*)zT0;{Xx5 z1$am>NiY>(E|mzmFuocfBsCWLd68F7^P|+Y`n2I&saboQFBD97`U))&W?e542N{e4 zLaw5kmrBV1D~}7eNWuH$YQjzye*=54Ho*I2ImV~j(*tVfg#LW}A1>D5oBth53rh$JZN&x| ze8bj9`lloR`!WBw&HL_Wd#~;ZwvdqRuk9;!qGL;|#2~gK zn?qP>-g|j?#8c@o1_+F@Q5Ig}#9MXrPTMq>QKcW-^(WRDha$e@Pt2cxv290#46{X5 zaWir54HAnkPqQPCrV;pKEF-9~=uLEb?v*L=g(-|N3ne~SGzhVb%~@PfpzgAV=sg%4 z+aP#&=qq^j(WJsRnV-(aF{3P6!2E)4 z0%Wm!Rwy8%$^zs9$LDW0g2- zhC{=xK`T!Sl`)cOO4)Z_x9@ry8OVC{7$6S9H?It>O;t}thHd`*GPZdYL|o1!Wh#hk zr)X1@?RauM0*)T}PPe3WRqR)OEpk7a>2&chD3P|F!p%J-xf20CoAa|Y;>1ni585z( zAQ0o#Qzw`cx_loEJ?{wGv%o0bj1mmDz%{w(0658oYI2Ui6kj~t9J3!Ez?WY=)&=+W z#2S=vg9{W(Uziik8s3#FKWCP%gJKemD7rH~K6_^Es@p`;q+kjByu1_)WcvvbDB!&U z#eU%%qx%u?_xVSi5Ct0c~K4|@Adf+zra zbDCFN#V6Cw$li$d@PT&Dl=Qjq+HwiSI1RiT<@7MnRiX00_*cq9{DVzmcTEik{WhRL zCXT*5SO=26XJt4Jll?ZxRQ>Ka;l}IsJZ@OzRjwn->zou=NpI}<6^l%(r{R@Cf|va$iI z!r_*7iIiDuPrsKZwUSA>;xO#wjj(X`_cisRh{;qG%CLn{-Rg>OT`1SDbdGh;sM}GnP7Q+X={u|+e$m(Xv zN658)WI{N^H2xIq8oMhCuL&vl$hoz@M3Joc;&JoxcEya@8tQ%4)uMsrQMI+vP<}MJ z87Jjg-b;}?4V@v#6Jm%2OBh73@RvXg{zS4s=%e`lb;iVA8{UzT{l%QPAiX`e{mSAZ z=Q=QSz}XH12$-q=QI5jY@Bn=vny01g%e>ux%;}MkzSL*VlImZsEN{W6O$$-`?0OZ%`AuOi+Ts!onh{-2>BO?m9!% zuq_bMpxGJ1>d~}Ha{FfltSmxSd!-mY0@M3x8yNVIQd56BoTBA3c-2}*%d9}e^YC)Y zbIBV3zg-#-_sZemG|4oflVYIb-i{JY%+Ag>CXl1$AA%_K^m$kxDY!E;FGYAhY+q-( zx#vIYFiQ$T@UGBcQT&>ror|hN@iUmB%rcy>oxdWIE+Q+m`v_(_N)bV67+NpQk*iW5=T+q)IyB6#((V;X_dq$6CrTYUo4L@aTe<%dJVKRXy;mdgAB3bHo21h** zV0STcvX)j0Slaif_eKDdh$pVlry8|Cz~#|J_viY(rnjzQm#k1sMhNfK;y{%_FBov7rW-XJ$=SjvQNeRUL2AAKmQFB9@*(eqZO*vEm}PI3yK75pE2zQJ#l^>e z%WvG+975HXsuVpc8U!|J2-g>+{ zCsmKHW-tB>e6P=aw;~pAHt5h!7+ZnU>HK(6NL|gYo=ETe^uTMt*eZDOB0eAf=9lP` z9Lww`*Bie54Vzqd+bNheX8iCde)$kdeOOz2Z5nnD)KhT<^Q##sAdeR>V5xvq&lK9Q zIT=Y!Lh^anzbV|q5@43ZpIFFZ>}|SR2VsQ^eTa%`H8|g?(vH|*l+uG*#{&FLf$?uy zDRcc1!x}LDsW2uoEePA34}RZFZzCrcFk^p*Im^s!#j>TrkTneoS3+~J4SSf`Auy&k z(WsA;`wao#N_SC;!?6^_50lZZ&kkJH_R$ZE0IjGMoM-nb8G|JBoh-v281YV39MNp7 z0hY;8$YY~T#l2^$szvG=8m0Q_>)$UXHoK7YZhEFC#QZ$lbdsQTM)TUW-Abn9fMs;U zjS-}4@em*u9BhdPllVLV>kge#kEhoO@=*G%H1gFC3)jVji+%PW!&+}lG0*2H@glsB zu*E@8H1|>ysE0Ix5@HPeGh#F_z{fb#gLd;QVzdv7O4mr7-61y13G(!?yqDy|;FlD) zexDP3%5HwjIa&o1D5v$v->5rDHcHKMC?7c31V8e$dbb_=)6iz0wsv$BN|~+-C5|4< z1c-xJCxPw3+B1~^nWPYqeJ7Au%lJ;LRFV||WyelQEbqh-OrA^`R!bkcZrLX+%eyHe z8m&hIcTt&_QaUyc*CzGH#ZNp@J#b4MfE`7`ZXpE1m1&T;CF3bs28-H*V3)i1C`b@giBF9y64X#=MfkB8*cqao+F1P{beEPCTD#&ae; z+Xmvxowk3FlJ<7&8+!r@i;W7-T#_vMJhrrx8oe7VY;SXFDrQuIue&Y*t2hRVLPufg zro(=O0^s>#BDK<2Rl;!S<{PEz;1IQ&ems1KC>bZSwS;=+M99e^j&0hd=#Zc&?QTV} zj%^KmE%U;?=ff&vPS^6N~$M(a2I_}SU-`QryhgJQ##v$?IA z;^i@`SahtO-&w6~^H-i}WltaRjN9i5cev?3pOvt0D*Y<|I?pM+G#qYN)%(HE`4)m+ zH!)YnNRs-q=jk=@5=M%BgiE_#2}kbj33At;rPr9gex0=;Qgkr^O6y+r+7sU~E}V7S z)e2|p*57;e1daiY!f~sUk!4Wx7sf1T#JNs}&Q;>+N$JQWjDxxZBMghc=J!hVt);co zflU}iW)^!<%|B1X%NG5>|7MAgOzZFsSJbl~3uJ|=arMi6F=e9ICPp#=rxcc8UXfBp z9ud|NR$NreQFlh&Yh>~nJh3@aSvj2B8VONz9K$V~(6ajhQUzEPL)~^-zC!kS)6RWNtk+s{ z#JC=4>y6K2(#l={lr4A34c~2ZG@lA*sZK+Wv~2FuSl_%nBuXZsm97*GFsErJ@#)JS z`7|I(#ddQgU>>-rz-jgB&$c4f^!HfjQ3c9nh_*kQ1MNM3D-!!RfPCibeeYPS(+?l; z^$Ko9QC5_U>%73OSjM7(h8rm=lWu)ruW$x*3sRb~Cm<$}x!L@h2WMT-WshAG!S<6} z0yb@oe9eUj@j_fp5O*9jR){?;cC=&mZb-fpEcg!FphyJ-oE77=LMVXA1dQZaVq(4u z6&cuZH!Urh#R!<>H)<+_xBqV z`J-kQqD;hhgA48VTup?)d6(b_S42ZY1AEaO4h{}MNk&GQIaCex4{cx~lKwfZ0#(W8 z{7ezJaJcmi9ovA1ZOG(MihSLbr$bRw&_G= zW#xXx%Enj>;+yC{mmn230OtTl3$Hy-(p;jmL6RBjL!zb}{(#v=R%@dP0Li;dLBx^> z@*Uzk^h1nYdb#JvT^_CPSe2d<7h8_q0PRZM)AbH&L@T#UYUCO#V~S+)fv@a3gwGiD zkGeYxhJ6;1ZABb~AiUjK*@l-~RvwHng7nY8r2{O5JwP9`#l6DCtChtF(kg)LrL#DE zdG9fD39%!gqyN}^PFw*#+FDVgW@MbVa&S3`BW9T-a|55P)R$ zHtzvtY>$%sTZ9FoK63Gl?_3Yo^0*pI?a4TN zvomT0?v4?Zd<6jep=4G^H84kdd-7LKeLF8&J6#RSJ^mw)BUKXq`Rx%6(t6}RPGGDSdr~@$uf`|Edo*j4TsH^4LUJZV(oai2D`lOZ?%8?e(mfXopZ<4S> z2-t+Y`PBvT+Jjlncg8&L@TI{Kq8dHS6NEp>?4q80RgH|+cgaY+6No7e!^;7qq+9ef13b70=|A+N?n}h*a8koRtW*=E(glf} zLR^CCSAv7$7587UL-v$y&41O;1eloM2cE^wfWlNq59^?c=y%LdZaj>Vs&o~%Q{d4p zVe}A($xI&A>rk_l@V`MU^%!T^O<|&;M2~r+m-nebNt&X$&mKx|eHqSWI5;?kV6GN@ z8uP_ZOK+E!l1g7!t&%*^*^%}*^Vk{nKt@;WR*!uF3_roTN1voBQyoF`!pP#qTczlg z5$}D_oHLtVWHH*pP?|H22R`f_Vh(JEefV5h> zOr8Ftkka}5Bv@BlU)(lAYPWQB*d@vW93(o{*$-2t7xi)0t;Ki4qc%a)Er8l2k}c?; zJKj)vw`;PvM|f9i~^7HKY87ys3mC@IuG`Kj0{QerV~ zM2%s1r&IjhCxfI}9Hf}-UV55qii$6Z`Pp)9*Ng5XuM@nP)vIi=tYMENU9@Ih0^Tfj z%Z9;^BY_V_DV#o{cQY*xbim$w(AFAn>SEeieV%#t$(Nz!l);U?TE9qF*m{I{;OT(v zYVK#Saefl(0zG3FwfIMy$a&uU(;V09qN2m3!R0y~Wp=y_8j}SXhs`P?`;j>p&dDS5 z#TupWH_McO5@`C&dvkfanWWxixZ7ywVsi5Ot7QW-Y8m(XtUP#Q!7JO-W)j#itc7L|q2xL(<8OLb_6LE&-m@KZAj<)w#HEv6~Ha;*j zL&Ud>&i7sSPsPF8hRm1M%=kz3{K-pJD$hwB7qj(ZML^eKB-sTR(p;rs zDIRwV*7gUn?A}{NUn_oC#>I<06*=cxsntyx;afE8b?*V&4s32~k&u?JCZD#cRkJDl z8Hd42`@V|WV&`Bekw!F|LJ#b7+FeYJ8ypYBKYuG5jd}7(oXuSk*~yo(y;_$rJgnus z)}#3AXgg>AtMqA%_WP+A(Vmlee)WcIk7~H>*!N;{$>sodsZv9)lUxdk{SOX28ghPq z{;%l4S%h#6&6u5o$4Y3GWWFpo;AZu04SM_p_@I-^>@VD7!$qEqyha;&yH@04;MLyp zp-{2n;bKPyf)MHJJe>lKgkoJ=Az$IUKTTZ2fPe~ZNlMRN#Xy@v_=*60ToifcVO{Go*8tWGQg8gb9qkp%kOugvWwK`YXh3x=O(};Wv!jViJLrsul>*m zi|_+3DAZXH#9vs0|8R>rJC77P2{d+8KHf|{yEAK5n)+zI-W$38Epi9;Jh|P~EW5%m zk^r@!uirK=Y2onx>U>X=2XnRSJlU(M?1VSwcS1oeR)3_t9m6}&eSE)nHK#4+)|3{U zz^1A~w=?R)lCOCUfr#jnJBLt_O&_srtt`(UkbgpiDq%pGxnxhq6Sm`8AG;Uzt2xWU zXGJ>pUc}IGj+m5KKi74mO6LH%ZoN}IRwQm$VK;SnaR1*$ zz!#P&&m9KftWl0R(`%C%W{bP1Xo#7M5pN8Q1mVv<(#m3!nIBy%Kyv0v!Ep9Ziy9mJ zy$!Y|MnufCzoI?D+-{g*S$SOP+WI1}xc$Xzs6mF>MeCN3q>A5b#6I3nnvFUt8akGN zr%xZNtFI|%;I*GdRwKV(*Q$1FKe#Z^?xF!_yJ#}5H)qSBLoU3TKL5OMXudn-EHZ12Hhe?#12*2Sf$7AiGS~t*gY!rkavMv# zE^O{aou3Ttgv$rSmlF6S?V1L6tqfBSuspC@>C=>kcEgJ;Y@*gR4|f^BBevsa2!2V8 zKImhZ&|DF49xhQj&32Nm3sBcm907U7rO+$gc_8|+MeFCzi9bOHdBY$^_-n(65T@!R z8Tcu~MaFD^bA8id*AS-AbkaU1a#4wQSqkILXhFtF7QO;WO_|$C23soVMWLUwmv`zz zi({{)c_5v!AiDqlgZknJO@ra6YdYfIh z{9t{)#w(ol*yH@`oef2tr^?D9S-MCaZfyPPJ6#gork(K-1lH%Z&KiBCyW&$l%e+}f zA{**i_ze~FuA8aN0iB9-+E;+zMlRcKBjL8Zru7Z$Glt`nPZw{}go#R?Go0Hb?}da$ z>XkdT9nfvIN;-NxoYHj=yY*zS5|lRdFQ9lK&#T8XO=j}&CDKT!Ntw$J*CL893PzkY z+G=+frNLcSYG(#dqUdlzr^R;bM_2sKL}=R9Y>%HMBJsn0dak-naw?9QDGElW!|Y$& z8bWVhPAd+tXkhk_JgpCyxm!^nQKFU>f9McQ`Ti9>9wtKFbG)J&7f1 zby`l1u87)izty$$^1#Ll6b0^sF3EdieK+b#z@Ysm7uN?xdgm_pLdpK@ae72-0 z-48z9V1j=QM+r#t*&UyJUxIe)d_eYrI{$9@K}bpP{fOcp)n_}@K!)cA?5|kvFdQZC zBfWoU6$6oe)<1o#{h@N8jo>GR<%y^CWY+m#u2He^qBeCgeS9F4^;UfIEDQq!1G3;$ zxsfw&%>$6Y_c`0+X&<6}%k@c5lcWA84_i++t9iv8Dpr-hV_LO>XzFr*3lo4Vvx0jO z7$9&lJ$yr#qg(p;&b93Rn7?zVHdS)zpWT|kX14})24$Ox0H<#|6z&7^-XJ*ZQ;-ch z%wCOvh)>>V&;8Z*-!rvB@<%V7oIXa)XAnRE-7a)khZKn55}XI3q>}nG5X=1Z3Rfb1#9~&u^q3LTb~-A-=P8=NFjpfSb0hy9W2o= z3C3U@yS|kKx!%@Q&be1^z)@tnq4IfWXf_GytDTuE#a_watULL41!6VK%J=TUI{2w0 zP!Q9gR#L<0?%lhU##$CwT1J<{pkoFPH<+#ZnIb{g%Xg7cD`O_?K_enGIi#21qnPSr9kqKqfYAZ?coW6eHAu=lNfFg zYdnI%-a;i6R+aW2t)M@ETL*8EP;Nhngm77KeN8-iUeC_oB)x9 z=PB84RAXw;fzf%-Rj7#`Vf_!csv470_ftaWxlj?PON=`Bly(k)D>!IXFn3+`%HH${ zfcT0Aa7`xjJV3F)&sHwsP?6Mni@r5{#kcuOeZuPnO8&2iQkgy}#45;j3+3FD@e~0* ziMmkRTrslD#{?fWh4$sU^Oy(Y#=!3|+mSMSo4SEZVD<`VlYd1WZTXa=)UAdUPm@Ep z5G+iwSlo>kC?)1?yi^wy!^%+;LCY^ro-DMi6F>i#e*S;&cMJu;Q;@T50V>d2 zDfbi6C)q?>@W-xRj_PWlAUlFQm}QoJ(ynV_Cxrp21}fgZ^D`Fo zT6>g_k`1G=oKiKb=pq$*J=IR5~U{uZY8*cgi*k~IsvI5kih?4 zB@R3jHjpyh9w$=VGCTZP9y>6c0sMJQ@?T1Ag3JC&s+a#vkl!#9finavTYN1^AtwUVBBi5uLe@=T z>jL1x4|fKRcym4@@X9r%EjdCQ#D3QUmMCsD`Qx0)$`&%kGCl_a2q-cGfpF$A(-$x9 z{lSNhldVfhO6p0YCeKrVC-@rNvkOGN>=|}50jfMS5f7+#w)@>_b;FS4_A4S_0PPgH zX%GoA-=?bdAEKj`Y;5xOHYRj{VT%9QKLIF2hIZx(+$G$k$V?&BwY z;D+^&U4Eipg*@-cajnmmI&F8UcLTY8-8Pl^D2I+E5$lv+_bcpkvaUqcY?VM15bOr- z|Fa?z7#==!d2X{nLFMG+EcDHB*ytu4&uG4t-4D`gNDN@oCZ(cUCY5k=iIq>)0T07j zuyEkCG{BLRLSAX)$xtkJ(05ewE{XmA_dDmOo8=2=s0(Bu5-~7=YxL6@@WkH%k4UV6 zo*uWBD0*gpqSXfG;C0VCuf{V$SQOBIxFaKD!*}CwSn9aApY7m5MrmmO2ETDJU|fVu<1*@q-NkC$ zU#qI}$2fi0K*9h_+%}uR6-Y20W+z*c2GpR|;;MaaDnR4iTchIi^Scvz<#9+l74Z@w z-Kd2)J|a60j8{Q?H3?8ux?;53K*<{`Fp)t<^1HZz;}QwlI!t-MGhvY+oJYy4U%?Im zJ3;4jX0zasZAe&H)XgV1lwh#jE7ydf%Ek_ponI{+gCKOmk%)z}uszo##Ln-1)o0|# zYSXLBQmNjTb|H6Z0Z@qr7}qWHMIFG^{(EO{ufRyzKHc%zuE&zU`xlTd7bL@B!syWo zVsG*s4grY?dj<__(0sfB1hZ1OSzm()v7*A3Vga($j$wy#W|GFs%Zrqhlo^oTJ^-qW z={K+)031_n5J??6wp7t*B=;r$)G@@9`7uu+^Q zu(Y&TFVlW;{RMSl3`v0pb$zTj8l1MYz+=w(*D~W*lw8@h5{957iItJDCWfO7EJ{Kz zQN!qP7=637yof6S6_8U>KfHEjAk#i<+f>?Bb))K(UM~b3r+%G${ Date: Fri, 27 Dec 2024 17:33:13 +0100 Subject: [PATCH 32/63] Fix hf version and clip tests --- hf_transformers | 2 +- tests/test_methods/test_clip_text.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hf_transformers b/hf_transformers index 53fad641cf..052e652d6d 160000 --- a/hf_transformers +++ b/hf_transformers @@ -1 +1 @@ -Subproject commit 53fad641cfdb5105e2470bcf3ef17ea8e25cc300 +Subproject commit 052e652d6d53c2b26ffde87e039b723949a53493 diff --git a/tests/test_methods/test_clip_text.py b/tests/test_methods/test_clip_text.py index 82db4fd30c..f93e714cd5 100644 --- a/tests/test_methods/test_clip_text.py +++ b/tests/test_methods/test_clip_text.py @@ -76,7 +76,7 @@ class CLIPAdapterTestBase(TextAdapterTestBase): intermediate_size=37, ), CLIPVisionConfig( - image_size=30, + image_size=224, hidden_size=32, num_hidden_layers=4, num_attention_heads=4, From d338105ad72d3f9e0a809123f843ab8c67b25fe2 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 6 Jan 2025 22:00:26 +0100 Subject: [PATCH 33/63] Draft import structure and adapter model class --- src/adapters/models/mllama/_init_.py | 39 +++++ src/adapters/models/mllama/adapter_model.py | 153 ++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 src/adapters/models/mllama/_init_.py create mode 100644 src/adapters/models/mllama/adapter_model.py diff --git a/src/adapters/models/mllama/_init_.py b/src/adapters/models/mllama/_init_.py new file mode 100644 index 0000000000..12ff0ddd99 --- /dev/null +++ b/src/adapters/models/mllama/_init_.py @@ -0,0 +1,39 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. + +# Copyright 2020 The Adapter-Hub Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +from transformers.utils import _LazyModule + + +_import_structure = { + "adapter_model": ["MllamaAdapterModel"], +} + + +if TYPE_CHECKING: + from .adapter_model import MllamaAdapterModel + +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + ) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py new file mode 100644 index 0000000000..8812a2ecd4 --- /dev/null +++ b/src/adapters/models/mllama/adapter_model.py @@ -0,0 +1,153 @@ +import logging +from typing import List, Optional, Tuple, Union + +import torch + +from hf_transformers.build.lib.transformers.cache_utils import Cache +from hf_transformers.build.lib.transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.models.mllama import MLLAMA_START_DOCSTRING, MllamaPreTrainedModel, MllamaTextModel +from transformers.utils import add_start_docstrings + +from ...composition import adjust_tensors_for_parallel +from ...heads import ModelWithFlexibleHeadsAdaptersMixin +from ...model_mixin import EmbeddingAdaptersWrapperMixin +from ...wrappers import init + + +logger = logging.getLogger(__name__) + + +@add_start_docstrings( + """ + TODO + """, + MLLAMA_START_DOCSTRING, +) +class MllamaTextAdapterModel( + EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel +): + head_types = [ + "causal_lm", + ] # TODO: "conditional_generation" + + def __init__(self, config): + super().__init__(config) + self.model = MllamaTextModel + init(self.model) + + self._init_head_modules() + + self.init_weights() + + # Model parallel + self.model_parallel = False + self.device_map = None + self.post_init() + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + cross_attention_states: Optional[torch.FloatTensor] = None, + cross_attention_mask: Optional[torch.Tensor] = None, + full_text_row_masked_out_mask: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + head=None, + output_adapter_gating_scores=False, + output_adapter_fusion_attentions=False, + **kwargs, + ) -> Union[Tuple, BaseModelOutputWithPast]: + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs, context = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + cross_attention_states=cross_attention_states, + cross_attention_mask=cross_attention_mask, + full_text_row_masked_out_mask=full_text_row_masked_out_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + cache_position=cache_position, + output_adapter_gating_scores=output_adapter_gating_scores, + output_adapter_fusion_attentions=output_adapter_fusion_attentions, + adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), + output_context=True, + ) + kwargs["context"] = context + batch_size = outputs[0].shape[0] + + if self.config.pad_token_id is None: + # TODO-AH: this may result in unexpected behavior for classification. Find a better way to do this? + sequence_lengths = -1 + else: + if input_ids is not None: + sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 + (sequence_lengths,) = adjust_tensors_for_parallel(outputs[0], sequence_lengths) + else: + sequence_lengths = -1 + logger.warning( + f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" + ) + + cls_logits = outputs[0][range(batch_size), sequence_lengths] + + outputs = self.forward_head( + outputs, + head_name=head, + cls_output=cls_logits, + attention_mask=attention_mask, + return_dict=return_dict, + **kwargs, + ) + + return outputs + + def prepare_inputs_for_generation( + self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs + ): + if past_key_values: + input_ids = input_ids[:, -1:] + + position_ids = kwargs.get("position_ids", None) + if attention_mask is not None and position_ids is None: + # create position_ids on the fly for batch generation + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + if past_key_values: + position_ids = position_ids[:, -1].unsqueeze(-1) + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} + else: + model_inputs = {"input_ids": input_ids} + + model_inputs.update( + { + "position_ids": position_ids, + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "attention_mask": attention_mask, + "adapter_input_parallelized": kwargs.pop("adapter_input_parallelized", False), + } + ) + return model_inputs From dc5dc6d9f01b79586ef90fec354caafde51d6712 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 7 Jan 2025 18:30:34 +0100 Subject: [PATCH 34/63] Update gitignore for development --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8b1478d39b..a6d5b296c5 100644 --- a/.gitignore +++ b/.gitignore @@ -176,4 +176,7 @@ scripts/git-strip-merge tests/backwards_compatibility/Ref_Out # backwards compatibility -model_outputs \ No newline at end of file +model_outputs + +# TODO: remove after mllama dev +explore_mllama \ No newline at end of file From 7b46d587d6ab6c5cd514dbab4e9df07a836cff47 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 8 Jan 2025 09:24:23 +0100 Subject: [PATCH 35/63] More thorough draft of adapter model --- src/adapters/models/mllama/adapter_model.py | 253 ++++++++++++++------ 1 file changed, 183 insertions(+), 70 deletions(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 8812a2ecd4..5dd58f19cc 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -2,10 +2,18 @@ from typing import List, Optional, Tuple, Union import torch +from torch import nn +from adapters.heads.language_modeling import CausalLMOutputWithPast from hf_transformers.build.lib.transformers.cache_utils import Cache from hf_transformers.build.lib.transformers.modeling_outputs import BaseModelOutputWithPast -from transformers.models.mllama import MLLAMA_START_DOCSTRING, MllamaPreTrainedModel, MllamaTextModel +from transformers.models.mllama import ( + MLLAMA_START_DOCSTRING, + MllamaPreTrainedModel, + MllamaVisionModel, + MllamaTextModel, +) +from transformers.models.mllama.modeling_mllama import _prepare_cross_attention_mask from transformers.utils import add_start_docstrings from ...composition import adjust_tensors_for_parallel @@ -17,62 +25,126 @@ logger = logging.getLogger(__name__) -@add_start_docstrings( - """ - TODO - """, - MLLAMA_START_DOCSTRING, -) -class MllamaTextAdapterModel( - EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel -): - head_types = [ - "causal_lm", - ] # TODO: "conditional_generation" +@add_start_docstrings(MLLAMA_START_DOCSTRING) +class MllamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel): def __init__(self, config): super().__init__(config) - self.model = MllamaTextModel - init(self.model) + self.vocab_size = config.text_config.vocab_size + self.hidden_size = config.text_config.hidden_size + self.max_num_tiles = config.vision_config.max_num_tiles + self.vision_output_dim = config.vision_config.vision_output_dim + self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 + + self.vision_model = MllamaVisionModel._from_config(config.vision_config) + self.language_model = MllamaTextModel._from_config(config.text_config) + self.multi_modal_projector = nn.Linear( + config.vision_config.vision_output_dim, + config.text_config.hidden_size, + bias=True, + ) + init(self.vision_model) + init(self.language_model) self._init_head_modules() + self.post_init() - self.init_weights() + def get_input_embeddings(self): + return self.language_model.get_input_embeddings() - # Model parallel - self.model_parallel = False - self.device_map = None - self.post_init() + def set_input_embeddings(self, value): + self.language_model.set_input_embeddings(value) + + def get_output_embeddings(self): + return self.language_model.get_output_embeddings() + + def set_output_embeddings(self, new_embeddings): + self.language_model.set_output_embeddings(new_embeddings) + + def set_decoder(self, decoder): + self.language_model.set_decoder(decoder) + + def get_decoder(self): + return self.language_model.get_decoder() + + def tie_weights(self): + return self.language_model.tie_weights() def forward( self, input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + aspect_ratio_mask: Optional[torch.Tensor] = None, + aspect_ratio_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.LongTensor] = None, - cross_attention_states: Optional[torch.FloatTensor] = None, cross_attention_mask: Optional[torch.Tensor] = None, - full_text_row_masked_out_mask: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + cross_attention_states: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, + num_logits_to_keep: int = 0, head=None, output_adapter_gating_scores=False, output_adapter_fusion_attentions=False, **kwargs, - ) -> Union[Tuple, BaseModelOutputWithPast]: + ): # TODO -> output format + + # TODO: incorporate adapter logic with Forwardcontext and heads + # Establish parameter values output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states ) - use_cache = use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict - outputs, context = self.model( + # Check invalid argument combinations + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError("You must specify exactly one of input_ids or inputs_embeds") + if pixel_values is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one" + ) + if pixel_values is not None and cross_attention_states is not None: + raise ValueError("`pixel_values` and `cross_attention_states` cannot be provided simultaneously") + + # If image is provided compute cross_attention_states + if pixel_values is not None: + if aspect_ratio_ids is None: + raise ValueError("`aspect_ratio_ids` must be provided if `pixel_values` is provided") + vision_outputs = self.vision_model( + pixel_values=pixel_values, + aspect_ratio_ids=aspect_ratio_ids, + aspect_ratio_mask=aspect_ratio_mask, + output_hidden_states=output_hidden_states, + output_attentions=output_attentions, + return_dict=return_dict, + ) + cross_attention_states = vision_outputs[0] + cross_attention_states = self.multi_modal_projector(cross_attention_states).reshape( + -1, cross_attention_states.shape[-2], self.hidden_size + ) + + # Compute cross_attention_mask + if cross_attention_mask is not None: + cross_attention_mask, full_text_row_masked_out_mask = _prepare_cross_attention_mask( + cross_attention_mask, + num_vision_tokens=self.vision_model.num_patches, + dtype=self.dtype, + ) + else: + full_text_row_masked_out_mask = None + if cross_attention_mask is not None and cache_position is not None: + cross_attention_mask = cross_attention_mask[:, :, cache_position] + full_text_row_masked_out_mask = full_text_row_masked_out_mask[:, :, cache_position] + + outputs = self.language_model( input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids, @@ -80,74 +152,115 @@ def forward( cross_attention_mask=cross_attention_mask, full_text_row_masked_out_mask=full_text_row_masked_out_mask, past_key_values=past_key_values, - inputs_embeds=inputs_embeds, use_cache=use_cache, - output_attentions=output_attentions, + inputs_embeds=inputs_embeds, + labels=labels, output_hidden_states=output_hidden_states, + output_attentions=output_attentions, return_dict=return_dict, cache_position=cache_position, - output_adapter_gating_scores=output_adapter_gating_scores, - output_adapter_fusion_attentions=output_adapter_fusion_attentions, - adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), - output_context=True, ) - kwargs["context"] = context - batch_size = outputs[0].shape[0] - if self.config.pad_token_id is None: - # TODO-AH: this may result in unexpected behavior for classification. Find a better way to do this? - sequence_lengths = -1 - else: - if input_ids is not None: - sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 - (sequence_lengths,) = adjust_tensors_for_parallel(outputs[0], sequence_lengths) - else: - sequence_lengths = -1 - logger.warning( - f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - "unexpected if using padding tokens in conjunction with `inputs_embeds.`" - ) - - cls_logits = outputs[0][range(batch_size), sequence_lengths] - - outputs = self.forward_head( - outputs, - head_name=head, - cls_output=cls_logits, - attention_mask=attention_mask, - return_dict=return_dict, - **kwargs, - ) + # TODO: head logic, until now just copied! + hidden_states = outputs[0] + logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float() + + loss = None + if labels is not None: + loss = self.loss_function(logits, labels, self.vocab_size, **kwargs) - return outputs + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) def prepare_inputs_for_generation( - self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs + self, + input_ids=None, + inputs_embeds=None, + attention_mask=None, + position_ids=None, + pixel_values=None, + aspect_ratio_ids=None, + aspect_ratio_mask=None, + cross_attention_mask=None, + past_key_values=None, + use_cache=False, + cache_position=None, + num_logits_to_keep=None, + **kwargs, ): - if past_key_values: - input_ids = input_ids[:, -1:] + # Overwritten -- in specific circumstances we don't want to forward image inputs to the model + + # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens + # Exception 1: when passing input_embeds, input_ids may be missing entries + # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here + if past_key_values is not None: + if inputs_embeds is not None: # Exception 1 + input_ids = input_ids[:, -cache_position.shape[0] :] + elif input_ids.shape[1] != cache_position.shape[0]: # Default case (the "else", a no op, is Exception 2) + input_ids = input_ids[:, cache_position] - position_ids = kwargs.get("position_ids", None) + # TODO: we have no attention_mask so this won't work, check if we really won't need attention mask and find another way if attention_mask is not None and position_ids is None: # create position_ids on the fly for batch generation position_ids = attention_mask.long().cumsum(-1) - 1 position_ids.masked_fill_(attention_mask == 0, 1) if past_key_values: - position_ids = position_ids[:, -1].unsqueeze(-1) + position_ids = position_ids[:, -input_ids.shape[1] :] + + # This `clone` call is needed to avoid recapturing cuda graphs with `torch.compile`'s `mode="reduce-overhead`, as otherwise the input `position_ids` would have various stride during the decoding. Here, simply using `.contiguous()` is not sufficient as in the batch size = 1 case, `position_ids` is already contiguous but with varying stride which retriggers a capture. + position_ids = position_ids.clone(memory_format=torch.contiguous_format) # if `inputs_embeds` are passed, we only want to use them in the 1st generation step - if inputs_embeds is not None and past_key_values is None: - model_inputs = {"inputs_embeds": inputs_embeds} + if inputs_embeds is not None and cache_position[0] == 0: + model_inputs = {"inputs_embeds": inputs_embeds, "input_ids": None} else: - model_inputs = {"input_ids": input_ids} + # The clone here is for the same reason as for `position_ids`. + model_inputs = {"input_ids": input_ids.clone(memory_format=torch.contiguous_format), "inputs_embeds": None} + + if num_logits_to_keep is not None: + model_inputs["num_logits_to_keep"] = num_logits_to_keep model_inputs.update( { "position_ids": position_ids, + "cache_position": cache_position, "past_key_values": past_key_values, - "use_cache": kwargs.get("use_cache"), + "use_cache": use_cache, "attention_mask": attention_mask, - "adapter_input_parallelized": kwargs.pop("adapter_input_parallelized", False), + "cross_attention_mask": cross_attention_mask, } ) + + # If we're in pre-fill or cacheless decoding step, then we need pixel_values and aspect ratios + # to compute image hidden states, otherwise they are cached within each cross attn layer + if cache_position[0] == 0: + model_inputs["pixel_values"] = pixel_values + model_inputs["aspect_ratio_ids"] = aspect_ratio_ids + model_inputs["aspect_ratio_mask"] = aspect_ratio_mask + return model_inputs + + def _update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, **kwargs): + cross_attention_mask_prev = model_kwargs.get("cross_attention_mask", None) + model_kwargs = super()._update_model_kwargs_for_generation( + outputs=outputs, + model_kwargs=model_kwargs, + is_encoder_decoder=is_encoder_decoder, + **kwargs, + ) + + # add cross-attn mask for new token + if cross_attention_mask_prev is not None: + model_kwargs["cross_attention_mask"] = torch.cat( + [cross_attention_mask_prev, cross_attention_mask_prev[:, -1:, ...]], dim=1 + ) + return model_kwargs From 32609c3b96636391afa4005f4b413a35c5cdab52 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 8 Jan 2025 11:04:16 +0100 Subject: [PATCH 36/63] Draft mllama adapter mixins --- src/adapters/models/mllama/mixin_mllama.py | 144 +++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 src/adapters/models/mllama/mixin_mllama.py diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py new file mode 100644 index 0000000000..a40f384756 --- /dev/null +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -0,0 +1,144 @@ +from typing import Iterable, Tuple + +import torch.nn as nn + +from ...composition import adjust_tensors_for_parallel_ +from ...methods.bottleneck import BottleneckLayer +from ...methods.lora import LoRALinear +from ...methods.prefix_tuning import PrefixTuningLayer +from ...methods.reft import ReftLayer, hook_fn +from ...model_mixin import ( + EmbeddingAdaptersMixin, + InvertibleAdaptersMixin, + ModelBaseAdaptersMixin, +) +from ...utils import patch_forward + + +class MllamaBaseAttentionAdaptersMixin: + """Base mixin class for adding adapter support to attention modules in MLLaMA. + + Implements common adapter functionality for all attention variants including: + - LoRA adapters for query, key, and value projections + - Additional Prefix tuning layer + + This base implementation ensures consistent adapter behavior across different + attention mechanisms in the model. + """ + + def init_adapters(self, model_config, adapters_config): + # Wrap layers for LoRA + self.q_proj = LoRALinear.wrap(self.q_proj, "selfattn", model_config, adapters_config, attn_key="q") + self.k_proj = LoRALinear.wrap(self.k_proj, "selfattn", model_config, adapters_config, attn_key="k") + self.v_proj = LoRALinear.wrap(self.v_proj, "selfattn", model_config, adapters_config, attn_key="v") + + self.prefix_tuning = PrefixTuningLayer( + "self_prefix", model_config, adapters_config, add_model_type_to_key=True + ) + patch_forward(self) + + +class MllamaVisionAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's vision attention module.""" + + +class MllamaTextCrossAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's cross-attention module.""" + + +class MllamaTextSelfAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's self-attention module.""" + + +class MllamaBaseLayerAdaptersMixin: + """Base mixin class for adding adapter support to MLLaMA layer modules. + + Implements common layer-level adapter functionality including: + - LoRA adapters for MLP layers (fc1/fc2) + - Bottleneck adapters for attention and output + - Forward pass patching for adapter integration + """ + + def init_adapters(self, model_config, adapters_config): + # Wrap layers for LoRA + self.mlp.fc1 = LoRALinear.wrap(self.mlp.fc1, "intermediate", model_config, adapters_config) + self.mlp.fc2 = LoRALinear.wrap(self.mlp.fc2, "output", model_config, adapters_config) + + self.attention_adapters = BottleneckLayer("mh_adapter") + self.output_adapters = BottleneckLayer("output_adapter") + + patch_forward(self) + + +class MllamaVisionEncoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's vision encoder layers.""" + + +class MllamaSelfAttentionDecoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's self-attention decoder layers.""" + + +class MllamaCrossAttentionDecoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's cross-attention decoder layers.""" + + +class MllamaVisionEncoderAdaptersMixin: + """Mixin for adding adapter support to MLLaMA's vision encoder module. + + Implements parallel composition support for vision encoder layers by: + - Setting up hooks to adjust tensors during forward pass for parallel adapter processing + """ + + def init_adapters(self, model_config, adapters_config): + # Set hook for parallel composition + for layer in self.layers: + self._set_layer_hook_for_parallel(layer) + + def _set_layer_hook_for_parallel(self, layer: nn.Module): + def hook(module, input): + adjust_tensors_for_parallel_(input[0], input[1]) + return input + + layer.register_forward_pre_hook(hook) + + +class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): + """Adds adapters to the a MllamaVisionModel class.""" + + support_prompt_tuning = False + + def init_adapters(self, model_config, adapters_config): + super().init_adapters(model_config, adapters_config) + + # Register hook for post embedding forward + self.embed_tokens.register_forward_hook(self.post_embedding_forward) + + def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: + for i, layer in enumerate(self.layers): + yield i, layer + + def post_embedding_forward(self, module, args, embedding_output): + embedding_output = self.invertible_adapters_forward(embedding_output) + # Prompt tuning not yet supported + return embedding_output + + +class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin): + """Adds adapters to the a MllamaTextModel class.""" + + support_prompt_tuning = False + + def init_adapters(self, model_config, adapters_config): + super().init_adapters(model_config, adapters_config) + + # Register hook for post embedding forward + self.embed_tokens.register_forward_hook(self.post_embedding_forward) + + def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: + for i, layer in enumerate(self.layers): + yield i, layer + + def post_embedding_forward(self, module, args, embedding_output): + embedding_output = self.invertible_adapters_forward(embedding_output) + # Prompt tuning not yet supported + return embedding_output From 2c80a5c9b5a96c0b775e185da931c247b1725f70 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 8 Jan 2025 16:29:31 +0100 Subject: [PATCH 37/63] Polish: - Add new test adding section to README and correct typos - Decide on more expressive file names - Save naming script for now, can be removed during Review --- tests/README.md | 125 ++++++++++++++---- .../{test_albert.py => test_on_albert.py} | 0 .../{test_bart.py => test_on_bart.py} | 0 .../{test_beit.py => test_on_beit.py} | 0 .../{test_bert.py => test_on_bert.py} | 0 ...neration.py => test_on_bert_generation.py} | 0 ...test_clip_text.py => test_on_clip_text.py} | 0 ..._clip_vision.py => test_on_clip_vision.py} | 0 .../{test_deberta.py => test_on_deberta.py} | 0 ...test_debertaV2.py => test_on_debertaV2.py} | 0 ...st_distilbert.py => test_on_distilbert.py} | 0 .../{test_electra.py => test_on_electra.py} | 0 ..._decoder.py => test_on_encoder_decoder.py} | 0 .../{test_gpt2.py => test_on_gpt2.py} | 0 .../{test_llama.py => test_on_llama.py} | 0 .../{test_mbart.py => test_on_mbart.py} | 0 .../{test_mistral.py => test_on_mistral.py} | 0 .../{test_mt5.py => test_on_mt5.py} | 0 .../{test_plbart.py => test_on_plbart.py} | 0 .../{test_roberta.py => test_on_roberta.py} | 0 .../{test_t5.py => test_on_t5.py} | 0 .../{test_vit.py => test_on_vit.py} | 0 .../{test_whisper.py => test_on_whisper.py} | 0 ..._xlm_roberta.py => test_on_xlm_roberta.py} | 0 .../{test_xmod.py => test_on_xmod.py} | 0 .../{test_albert.py => test_albert_model.py} | 0 .../{test_bart.py => test_bart_model.py} | 0 .../{test_beit.py => test_beit_model.py} | 0 ...ation.py => test_bert_generation_model.py} | 0 .../{test_bert.py => test_bert_model.py} | 0 .../{test_clip.py => test_clip_model.py} | 0 ...t_debertaV2.py => test_debertaV2_model.py} | 0 ...{test_deberta.py => test_deberta_model.py} | 0 ...distilbert.py => test_distilbert_model.py} | 0 ...{test_electra.py => test_electra_model.py} | 0 ...coder.py => test_encoder_decoder_model.py} | 0 .../{test_gpt2.py => test_gpt2_model.py} | 0 .../{test_gptj.py => test_gptj_model.py} | 0 .../{test_llama.py => test_llama_model.py} | 0 .../{test_mbart.py => test_mbart_model.py} | 0 ...{test_mistral.py => test_mistral_model.py} | 0 .../{test_mt5.py => test_mt5_model.py} | 0 .../{test_plbart.py => test_plbart_model.py} | 0 ...{test_roberta.py => test_roberta_model.py} | 0 .../{test_t5.py => test_t5_model.py} | 0 .../{test_vit.py => test_vit_model.py} | 0 ...{test_whisper.py => test_whisper_model.py} | 0 ...m_roberta.py => test_xlm_roberta_model.py} | 0 .../{test_xmod.py => test_xmod_model.py} | 0 utils/rename_script.py | 63 +++++++++ 50 files changed, 164 insertions(+), 24 deletions(-) rename tests/test_methods/{test_albert.py => test_on_albert.py} (100%) rename tests/test_methods/{test_bart.py => test_on_bart.py} (100%) rename tests/test_methods/{test_beit.py => test_on_beit.py} (100%) rename tests/test_methods/{test_bert.py => test_on_bert.py} (100%) rename tests/test_methods/{test_bert_generation.py => test_on_bert_generation.py} (100%) rename tests/test_methods/{test_clip_text.py => test_on_clip_text.py} (100%) rename tests/test_methods/{test_clip_vision.py => test_on_clip_vision.py} (100%) rename tests/test_methods/{test_deberta.py => test_on_deberta.py} (100%) rename tests/test_methods/{test_debertaV2.py => test_on_debertaV2.py} (100%) rename tests/test_methods/{test_distilbert.py => test_on_distilbert.py} (100%) rename tests/test_methods/{test_electra.py => test_on_electra.py} (100%) rename tests/test_methods/{test_encoder_decoder.py => test_on_encoder_decoder.py} (100%) rename tests/test_methods/{test_gpt2.py => test_on_gpt2.py} (100%) rename tests/test_methods/{test_llama.py => test_on_llama.py} (100%) rename tests/test_methods/{test_mbart.py => test_on_mbart.py} (100%) rename tests/test_methods/{test_mistral.py => test_on_mistral.py} (100%) rename tests/test_methods/{test_mt5.py => test_on_mt5.py} (100%) rename tests/test_methods/{test_plbart.py => test_on_plbart.py} (100%) rename tests/test_methods/{test_roberta.py => test_on_roberta.py} (100%) rename tests/test_methods/{test_t5.py => test_on_t5.py} (100%) rename tests/test_methods/{test_vit.py => test_on_vit.py} (100%) rename tests/test_methods/{test_whisper.py => test_on_whisper.py} (100%) rename tests/test_methods/{test_xlm_roberta.py => test_on_xlm_roberta.py} (100%) rename tests/test_methods/{test_xmod.py => test_on_xmod.py} (100%) rename tests/test_models/{test_albert.py => test_albert_model.py} (100%) rename tests/test_models/{test_bart.py => test_bart_model.py} (100%) rename tests/test_models/{test_beit.py => test_beit_model.py} (100%) rename tests/test_models/{test_bert_generation.py => test_bert_generation_model.py} (100%) rename tests/test_models/{test_bert.py => test_bert_model.py} (100%) rename tests/test_models/{test_clip.py => test_clip_model.py} (100%) rename tests/test_models/{test_debertaV2.py => test_debertaV2_model.py} (100%) rename tests/test_models/{test_deberta.py => test_deberta_model.py} (100%) rename tests/test_models/{test_distilbert.py => test_distilbert_model.py} (100%) rename tests/test_models/{test_electra.py => test_electra_model.py} (100%) rename tests/test_models/{test_encoder_decoder.py => test_encoder_decoder_model.py} (100%) rename tests/test_models/{test_gpt2.py => test_gpt2_model.py} (100%) rename tests/test_models/{test_gptj.py => test_gptj_model.py} (100%) rename tests/test_models/{test_llama.py => test_llama_model.py} (100%) rename tests/test_models/{test_mbart.py => test_mbart_model.py} (100%) rename tests/test_models/{test_mistral.py => test_mistral_model.py} (100%) rename tests/test_models/{test_mt5.py => test_mt5_model.py} (100%) rename tests/test_models/{test_plbart.py => test_plbart_model.py} (100%) rename tests/test_models/{test_roberta.py => test_roberta_model.py} (100%) rename tests/test_models/{test_t5.py => test_t5_model.py} (100%) rename tests/test_models/{test_vit.py => test_vit_model.py} (100%) rename tests/test_models/{test_whisper.py => test_whisper_model.py} (100%) rename tests/test_models/{test_xlm_roberta.py => test_xlm_roberta_model.py} (100%) rename tests/test_models/{test_xmod.py => test_xmod_model.py} (100%) create mode 100644 utils/rename_script.py diff --git a/tests/README.md b/tests/README.md index ebd1ac4123..71b5f34fc4 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1,7 +1,8 @@ -# Testing the adapters library +# Testing the Adapters Library -This README gives an overview of how the test directory is organized and the possibilities of grouping and executing different kinds of tests. -## Overview test directory structure +This README provides a comprehensive overview of the test directory organization and explains how to execute different types of tests within the adapters library. + +## Test Directory Structure Overview ``` tests/ @@ -15,10 +16,10 @@ tests/ │ │ ├── core/ │ │ ├── composition/ │ │ └── ... -│ ├── base.py # Base from which model test bases inherit from +│ ├── base.py # Base from which model test bases inherit │ ├── generator.py # Testcase generation and registration -│ ├── test_albert.py # Example model test base testing adapter methods on the model -│ ├── test_beit.py +│ ├── test_on_albert.py # Example model test base for testing adapter methods on albert adapter model +│ ├── test_on_beit.py │ └── ... ├── test_misc/ # Miscellaneous adapter method tests (single model) │ ├── test_adapter_config.py @@ -26,29 +27,34 @@ tests/ ├── test_models/ # Adapter model tests with Hugging Face test suite │ └── __init__.py │ │ ├── base.py -│ │ ├── test_albert.py +│ │ ├── test_albert_model.py │ │ └── ... ``` -We differentiate between three kinds of tests: +## Test Categories + +The testing framework encompasses three distinct categories of tests: + +1. Dynamic Adapter Method Tests: These tests cover core functionalities of the adapters library, including individual adapter methods (such as LoRA and prompt tuning) and head functionalities. These tests are executed across all supported models. + +2. Miscellaneous Adapter Method Tests: These supplementary tests cover scenarios not included in the dynamic tests. To optimize resources, they are executed on a single model, as repeated execution across multiple models would not provide additional value. -1. Dynamic adapter method tests: These tests cover most functionalities of the adapters library, e.g. the individual adapter methods (LoRA, prompt tuning) or head functionalities and **are executed on every model** -2. Miscellaneous adapter method tests: These are the remaining tests not covered by the dynamic tests and are **only executed on a single model** to spare ressources as repeated execution on every model would not provide additional value -3. Adapter model tests: These tests **check the implementation of the adapter models** themselves, by applying the Hugging Face model test suite +3. Adapter Model Tests: These tests verify the implementation of the adapter models themselves using the Hugging Face model test suite. -## Test Generator $ Pytest Markers +## Test Generator and Pytest Markers -This chapter zooms in on the test_methods directory. The main actor here is the file `generator.py` which is used by every model test base to generate the appropriate set of adapter method tests. Those tests are then registered in the respective model test file, like this: +The test_methods directory contains the central component `generator.py`, which generates appropriate sets of adapter method tests. Each model test base registers these tests using the following pattern: -``` python +```python method_tests = generate_method_tests(AlbertAdapterTestBase) for test_class_name, test_class in method_tests.items(): globals()[test_class_name] = test_class ``` -Each generatable class in `tests/test_methods` is decorated with a marker of a certain type, e.g.: -``` python +Each generated test class is decorated with a specific marker type. For example: + +```python @require_torch @pytest.mark.lora class LoRA( @@ -59,15 +65,86 @@ class LoRA( pass ``` -These markers can be used to execute a certain type of test **for every model**. To use them you have two options: -1. Use `make` command: +These markers enable the execution of specific test types across all models. You can run these tests using either of these methods: + +1. Using the make command: +```bash +make test-adapter-method-subset subset=lora +``` + +2. Directly executing from the test directory: +```bash +cd tests/test_methods +pytest -m lora +``` + +Both approaches will execute all LoRA tests across every model in the adapters library. + +## Adding a New Adapter Method to the Test Suite + +The modular design of the test base simplifies the process of adding tests for new adapter methods. To add tests for a new adapter method "X", follow these steps: + +1. Create the Test Implementation: + Create a new file `tests/test_methods/method_test_impl/peft/test_X.py` and implement the test mixin class: + + ```python + @require_torch + class XTestMixin(AdapterMethodBaseTestMixin): + + default_config = XConfig() + + def test_add_X(self): + model = self.get_model() + self.run_add_test(model, self.default_config, ["adapters.{name}."]) + + def ... + ``` + +2. Register the Test Mixin: + Add the new test mixin class to `tests/test_methods/generator.py`: + + ```python + from tests.test_methods.method_test_impl.peft.test_X import XTestMixin + + def generate_method_tests(model_test_base, ...): + """ Generate method tests for the given model test base """ + test_classes = {} + + @require_torch + @pytest.mark.core + class Core( + model_test_base, + CompabilityTestMixin, + AdapterFusionModelTestMixin, + unittest.TestCase, + ): + pass + + if "Core" not in excluded_tests: + test_classes["Core"] = Core + + @require_torch + @pytest.mark.X + class X( + model_test_base, + XTestMixin, + unittest.TestCase, + ): + pass + + if "X" not in excluded_tests: + test_classes["X"] = X + ``` + + The pytest marker enables execution of the new method's tests across all adapter models using: ```bash - make test-adapter-method-subset subset=lora + make test-adapter-method-subset subset=X ``` -2. Navigate to directory and directly execute: - ```bash - cd tests/test_methods - pytest -m lora + If the new method is incompatible with specific adapter models, you can exclude the tests in the respective `test_on_xyz.py` file: + + ```python + method_tests = generate_method_tests(BartAdapterTestBase, excluded_tests=["PromptTuning", "X"]) ``` -Both versions will execute all LoRA tests for every model in the adapters library. + + Note: It is recommended to design new methods to work with the complete library whenever possible. Only exclude tests when there are unavoidable compatibility issues and make them clear in the documenation. \ No newline at end of file diff --git a/tests/test_methods/test_albert.py b/tests/test_methods/test_on_albert.py similarity index 100% rename from tests/test_methods/test_albert.py rename to tests/test_methods/test_on_albert.py diff --git a/tests/test_methods/test_bart.py b/tests/test_methods/test_on_bart.py similarity index 100% rename from tests/test_methods/test_bart.py rename to tests/test_methods/test_on_bart.py diff --git a/tests/test_methods/test_beit.py b/tests/test_methods/test_on_beit.py similarity index 100% rename from tests/test_methods/test_beit.py rename to tests/test_methods/test_on_beit.py diff --git a/tests/test_methods/test_bert.py b/tests/test_methods/test_on_bert.py similarity index 100% rename from tests/test_methods/test_bert.py rename to tests/test_methods/test_on_bert.py diff --git a/tests/test_methods/test_bert_generation.py b/tests/test_methods/test_on_bert_generation.py similarity index 100% rename from tests/test_methods/test_bert_generation.py rename to tests/test_methods/test_on_bert_generation.py diff --git a/tests/test_methods/test_clip_text.py b/tests/test_methods/test_on_clip_text.py similarity index 100% rename from tests/test_methods/test_clip_text.py rename to tests/test_methods/test_on_clip_text.py diff --git a/tests/test_methods/test_clip_vision.py b/tests/test_methods/test_on_clip_vision.py similarity index 100% rename from tests/test_methods/test_clip_vision.py rename to tests/test_methods/test_on_clip_vision.py diff --git a/tests/test_methods/test_deberta.py b/tests/test_methods/test_on_deberta.py similarity index 100% rename from tests/test_methods/test_deberta.py rename to tests/test_methods/test_on_deberta.py diff --git a/tests/test_methods/test_debertaV2.py b/tests/test_methods/test_on_debertaV2.py similarity index 100% rename from tests/test_methods/test_debertaV2.py rename to tests/test_methods/test_on_debertaV2.py diff --git a/tests/test_methods/test_distilbert.py b/tests/test_methods/test_on_distilbert.py similarity index 100% rename from tests/test_methods/test_distilbert.py rename to tests/test_methods/test_on_distilbert.py diff --git a/tests/test_methods/test_electra.py b/tests/test_methods/test_on_electra.py similarity index 100% rename from tests/test_methods/test_electra.py rename to tests/test_methods/test_on_electra.py diff --git a/tests/test_methods/test_encoder_decoder.py b/tests/test_methods/test_on_encoder_decoder.py similarity index 100% rename from tests/test_methods/test_encoder_decoder.py rename to tests/test_methods/test_on_encoder_decoder.py diff --git a/tests/test_methods/test_gpt2.py b/tests/test_methods/test_on_gpt2.py similarity index 100% rename from tests/test_methods/test_gpt2.py rename to tests/test_methods/test_on_gpt2.py diff --git a/tests/test_methods/test_llama.py b/tests/test_methods/test_on_llama.py similarity index 100% rename from tests/test_methods/test_llama.py rename to tests/test_methods/test_on_llama.py diff --git a/tests/test_methods/test_mbart.py b/tests/test_methods/test_on_mbart.py similarity index 100% rename from tests/test_methods/test_mbart.py rename to tests/test_methods/test_on_mbart.py diff --git a/tests/test_methods/test_mistral.py b/tests/test_methods/test_on_mistral.py similarity index 100% rename from tests/test_methods/test_mistral.py rename to tests/test_methods/test_on_mistral.py diff --git a/tests/test_methods/test_mt5.py b/tests/test_methods/test_on_mt5.py similarity index 100% rename from tests/test_methods/test_mt5.py rename to tests/test_methods/test_on_mt5.py diff --git a/tests/test_methods/test_plbart.py b/tests/test_methods/test_on_plbart.py similarity index 100% rename from tests/test_methods/test_plbart.py rename to tests/test_methods/test_on_plbart.py diff --git a/tests/test_methods/test_roberta.py b/tests/test_methods/test_on_roberta.py similarity index 100% rename from tests/test_methods/test_roberta.py rename to tests/test_methods/test_on_roberta.py diff --git a/tests/test_methods/test_t5.py b/tests/test_methods/test_on_t5.py similarity index 100% rename from tests/test_methods/test_t5.py rename to tests/test_methods/test_on_t5.py diff --git a/tests/test_methods/test_vit.py b/tests/test_methods/test_on_vit.py similarity index 100% rename from tests/test_methods/test_vit.py rename to tests/test_methods/test_on_vit.py diff --git a/tests/test_methods/test_whisper.py b/tests/test_methods/test_on_whisper.py similarity index 100% rename from tests/test_methods/test_whisper.py rename to tests/test_methods/test_on_whisper.py diff --git a/tests/test_methods/test_xlm_roberta.py b/tests/test_methods/test_on_xlm_roberta.py similarity index 100% rename from tests/test_methods/test_xlm_roberta.py rename to tests/test_methods/test_on_xlm_roberta.py diff --git a/tests/test_methods/test_xmod.py b/tests/test_methods/test_on_xmod.py similarity index 100% rename from tests/test_methods/test_xmod.py rename to tests/test_methods/test_on_xmod.py diff --git a/tests/test_models/test_albert.py b/tests/test_models/test_albert_model.py similarity index 100% rename from tests/test_models/test_albert.py rename to tests/test_models/test_albert_model.py diff --git a/tests/test_models/test_bart.py b/tests/test_models/test_bart_model.py similarity index 100% rename from tests/test_models/test_bart.py rename to tests/test_models/test_bart_model.py diff --git a/tests/test_models/test_beit.py b/tests/test_models/test_beit_model.py similarity index 100% rename from tests/test_models/test_beit.py rename to tests/test_models/test_beit_model.py diff --git a/tests/test_models/test_bert_generation.py b/tests/test_models/test_bert_generation_model.py similarity index 100% rename from tests/test_models/test_bert_generation.py rename to tests/test_models/test_bert_generation_model.py diff --git a/tests/test_models/test_bert.py b/tests/test_models/test_bert_model.py similarity index 100% rename from tests/test_models/test_bert.py rename to tests/test_models/test_bert_model.py diff --git a/tests/test_models/test_clip.py b/tests/test_models/test_clip_model.py similarity index 100% rename from tests/test_models/test_clip.py rename to tests/test_models/test_clip_model.py diff --git a/tests/test_models/test_debertaV2.py b/tests/test_models/test_debertaV2_model.py similarity index 100% rename from tests/test_models/test_debertaV2.py rename to tests/test_models/test_debertaV2_model.py diff --git a/tests/test_models/test_deberta.py b/tests/test_models/test_deberta_model.py similarity index 100% rename from tests/test_models/test_deberta.py rename to tests/test_models/test_deberta_model.py diff --git a/tests/test_models/test_distilbert.py b/tests/test_models/test_distilbert_model.py similarity index 100% rename from tests/test_models/test_distilbert.py rename to tests/test_models/test_distilbert_model.py diff --git a/tests/test_models/test_electra.py b/tests/test_models/test_electra_model.py similarity index 100% rename from tests/test_models/test_electra.py rename to tests/test_models/test_electra_model.py diff --git a/tests/test_models/test_encoder_decoder.py b/tests/test_models/test_encoder_decoder_model.py similarity index 100% rename from tests/test_models/test_encoder_decoder.py rename to tests/test_models/test_encoder_decoder_model.py diff --git a/tests/test_models/test_gpt2.py b/tests/test_models/test_gpt2_model.py similarity index 100% rename from tests/test_models/test_gpt2.py rename to tests/test_models/test_gpt2_model.py diff --git a/tests/test_models/test_gptj.py b/tests/test_models/test_gptj_model.py similarity index 100% rename from tests/test_models/test_gptj.py rename to tests/test_models/test_gptj_model.py diff --git a/tests/test_models/test_llama.py b/tests/test_models/test_llama_model.py similarity index 100% rename from tests/test_models/test_llama.py rename to tests/test_models/test_llama_model.py diff --git a/tests/test_models/test_mbart.py b/tests/test_models/test_mbart_model.py similarity index 100% rename from tests/test_models/test_mbart.py rename to tests/test_models/test_mbart_model.py diff --git a/tests/test_models/test_mistral.py b/tests/test_models/test_mistral_model.py similarity index 100% rename from tests/test_models/test_mistral.py rename to tests/test_models/test_mistral_model.py diff --git a/tests/test_models/test_mt5.py b/tests/test_models/test_mt5_model.py similarity index 100% rename from tests/test_models/test_mt5.py rename to tests/test_models/test_mt5_model.py diff --git a/tests/test_models/test_plbart.py b/tests/test_models/test_plbart_model.py similarity index 100% rename from tests/test_models/test_plbart.py rename to tests/test_models/test_plbart_model.py diff --git a/tests/test_models/test_roberta.py b/tests/test_models/test_roberta_model.py similarity index 100% rename from tests/test_models/test_roberta.py rename to tests/test_models/test_roberta_model.py diff --git a/tests/test_models/test_t5.py b/tests/test_models/test_t5_model.py similarity index 100% rename from tests/test_models/test_t5.py rename to tests/test_models/test_t5_model.py diff --git a/tests/test_models/test_vit.py b/tests/test_models/test_vit_model.py similarity index 100% rename from tests/test_models/test_vit.py rename to tests/test_models/test_vit_model.py diff --git a/tests/test_models/test_whisper.py b/tests/test_models/test_whisper_model.py similarity index 100% rename from tests/test_models/test_whisper.py rename to tests/test_models/test_whisper_model.py diff --git a/tests/test_models/test_xlm_roberta.py b/tests/test_models/test_xlm_roberta_model.py similarity index 100% rename from tests/test_models/test_xlm_roberta.py rename to tests/test_models/test_xlm_roberta_model.py diff --git a/tests/test_models/test_xmod.py b/tests/test_models/test_xmod_model.py similarity index 100% rename from tests/test_models/test_xmod.py rename to tests/test_models/test_xmod_model.py diff --git a/utils/rename_script.py b/utils/rename_script.py new file mode 100644 index 0000000000..467c5767ac --- /dev/null +++ b/utils/rename_script.py @@ -0,0 +1,63 @@ +import os +import re + + +def rename_test_files(directory): + """ + Renames test files in the given directory from pattern 'test_name.py' to 'test_on_name.py' + + Args: + directory (str): The directory containing the test files to rename + + Returns: + dict: A mapping of old filenames to new filenames for successfully renamed files + """ + # Store the mapping of old to new names + renamed_files = {} + + # Regular expression to match test files + pattern = r"^test_([^on_].+)\.py$" + + # List all files in the directory + for filename in os.listdir(directory): + match = re.match(pattern, filename) + + # Check if the file matches our pattern and doesn't already have 'on' in it + if match and "test_on_" not in filename: + base_name = match.group(1) + new_filename = f"test_{base_name}_model.py" + + # Construct full file paths + old_path = os.path.join(directory, filename) + new_path = os.path.join(directory, new_filename) + + # Check if the new filename already exists + if os.path.exists(new_path): + print(f"Warning: {new_filename} already exists, skipping {filename}") + continue + + try: + # Rename the file + os.rename(old_path, new_path) + renamed_files[filename] = new_filename + print(f"Renamed: {filename} -> {new_filename}") + except OSError as e: + print(f"Error renaming {filename}: {e}") + + return renamed_files + + +# Example usage +if __name__ == "__main__": + # Get the current directory or specify your test directory path + current_dir = os.path.dirname(os.path.abspath(__file__)) + + print("Starting file rename operation...") + renamed = rename_test_files(current_dir) + + print("\nSummary of renamed files:") + if renamed: + for old_name, new_name in renamed.items(): + print(f"- {old_name} → {new_name}") + else: + print("No files were renamed.") From b6800127414d1284b7ed8fc93428a4f0b7e2f7fe Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 6 Jan 2025 22:00:26 +0100 Subject: [PATCH 38/63] Draft import structure and adapter model class --- src/adapters/models/mllama/_init_.py | 39 +++++ src/adapters/models/mllama/adapter_model.py | 153 ++++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 src/adapters/models/mllama/_init_.py create mode 100644 src/adapters/models/mllama/adapter_model.py diff --git a/src/adapters/models/mllama/_init_.py b/src/adapters/models/mllama/_init_.py new file mode 100644 index 0000000000..12ff0ddd99 --- /dev/null +++ b/src/adapters/models/mllama/_init_.py @@ -0,0 +1,39 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. + +# Copyright 2020 The Adapter-Hub Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +from transformers.utils import _LazyModule + + +_import_structure = { + "adapter_model": ["MllamaAdapterModel"], +} + + +if TYPE_CHECKING: + from .adapter_model import MllamaAdapterModel + +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + ) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py new file mode 100644 index 0000000000..8812a2ecd4 --- /dev/null +++ b/src/adapters/models/mllama/adapter_model.py @@ -0,0 +1,153 @@ +import logging +from typing import List, Optional, Tuple, Union + +import torch + +from hf_transformers.build.lib.transformers.cache_utils import Cache +from hf_transformers.build.lib.transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.models.mllama import MLLAMA_START_DOCSTRING, MllamaPreTrainedModel, MllamaTextModel +from transformers.utils import add_start_docstrings + +from ...composition import adjust_tensors_for_parallel +from ...heads import ModelWithFlexibleHeadsAdaptersMixin +from ...model_mixin import EmbeddingAdaptersWrapperMixin +from ...wrappers import init + + +logger = logging.getLogger(__name__) + + +@add_start_docstrings( + """ + TODO + """, + MLLAMA_START_DOCSTRING, +) +class MllamaTextAdapterModel( + EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel +): + head_types = [ + "causal_lm", + ] # TODO: "conditional_generation" + + def __init__(self, config): + super().__init__(config) + self.model = MllamaTextModel + init(self.model) + + self._init_head_modules() + + self.init_weights() + + # Model parallel + self.model_parallel = False + self.device_map = None + self.post_init() + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + cross_attention_states: Optional[torch.FloatTensor] = None, + cross_attention_mask: Optional[torch.Tensor] = None, + full_text_row_masked_out_mask: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + head=None, + output_adapter_gating_scores=False, + output_adapter_fusion_attentions=False, + **kwargs, + ) -> Union[Tuple, BaseModelOutputWithPast]: + + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = ( + output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + outputs, context = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + cross_attention_states=cross_attention_states, + cross_attention_mask=cross_attention_mask, + full_text_row_masked_out_mask=full_text_row_masked_out_mask, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + cache_position=cache_position, + output_adapter_gating_scores=output_adapter_gating_scores, + output_adapter_fusion_attentions=output_adapter_fusion_attentions, + adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), + output_context=True, + ) + kwargs["context"] = context + batch_size = outputs[0].shape[0] + + if self.config.pad_token_id is None: + # TODO-AH: this may result in unexpected behavior for classification. Find a better way to do this? + sequence_lengths = -1 + else: + if input_ids is not None: + sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 + (sequence_lengths,) = adjust_tensors_for_parallel(outputs[0], sequence_lengths) + else: + sequence_lengths = -1 + logger.warning( + f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" + ) + + cls_logits = outputs[0][range(batch_size), sequence_lengths] + + outputs = self.forward_head( + outputs, + head_name=head, + cls_output=cls_logits, + attention_mask=attention_mask, + return_dict=return_dict, + **kwargs, + ) + + return outputs + + def prepare_inputs_for_generation( + self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs + ): + if past_key_values: + input_ids = input_ids[:, -1:] + + position_ids = kwargs.get("position_ids", None) + if attention_mask is not None and position_ids is None: + # create position_ids on the fly for batch generation + position_ids = attention_mask.long().cumsum(-1) - 1 + position_ids.masked_fill_(attention_mask == 0, 1) + if past_key_values: + position_ids = position_ids[:, -1].unsqueeze(-1) + + # if `inputs_embeds` are passed, we only want to use them in the 1st generation step + if inputs_embeds is not None and past_key_values is None: + model_inputs = {"inputs_embeds": inputs_embeds} + else: + model_inputs = {"input_ids": input_ids} + + model_inputs.update( + { + "position_ids": position_ids, + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "attention_mask": attention_mask, + "adapter_input_parallelized": kwargs.pop("adapter_input_parallelized", False), + } + ) + return model_inputs From c32b08ae4e8611344e8af071d7414c2ab88da141 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 7 Jan 2025 18:30:34 +0100 Subject: [PATCH 39/63] Update gitignore for development --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8b1478d39b..a6d5b296c5 100644 --- a/.gitignore +++ b/.gitignore @@ -176,4 +176,7 @@ scripts/git-strip-merge tests/backwards_compatibility/Ref_Out # backwards compatibility -model_outputs \ No newline at end of file +model_outputs + +# TODO: remove after mllama dev +explore_mllama \ No newline at end of file From 4b38180b3ec5372e20f0ddd99c3954a7b53d75a0 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 8 Jan 2025 09:24:23 +0100 Subject: [PATCH 40/63] More thorough draft of adapter model --- src/adapters/models/mllama/adapter_model.py | 253 ++++++++++++++------ 1 file changed, 183 insertions(+), 70 deletions(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 8812a2ecd4..5dd58f19cc 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -2,10 +2,18 @@ from typing import List, Optional, Tuple, Union import torch +from torch import nn +from adapters.heads.language_modeling import CausalLMOutputWithPast from hf_transformers.build.lib.transformers.cache_utils import Cache from hf_transformers.build.lib.transformers.modeling_outputs import BaseModelOutputWithPast -from transformers.models.mllama import MLLAMA_START_DOCSTRING, MllamaPreTrainedModel, MllamaTextModel +from transformers.models.mllama import ( + MLLAMA_START_DOCSTRING, + MllamaPreTrainedModel, + MllamaVisionModel, + MllamaTextModel, +) +from transformers.models.mllama.modeling_mllama import _prepare_cross_attention_mask from transformers.utils import add_start_docstrings from ...composition import adjust_tensors_for_parallel @@ -17,62 +25,126 @@ logger = logging.getLogger(__name__) -@add_start_docstrings( - """ - TODO - """, - MLLAMA_START_DOCSTRING, -) -class MllamaTextAdapterModel( - EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel -): - head_types = [ - "causal_lm", - ] # TODO: "conditional_generation" +@add_start_docstrings(MLLAMA_START_DOCSTRING) +class MllamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel): def __init__(self, config): super().__init__(config) - self.model = MllamaTextModel - init(self.model) + self.vocab_size = config.text_config.vocab_size + self.hidden_size = config.text_config.hidden_size + self.max_num_tiles = config.vision_config.max_num_tiles + self.vision_output_dim = config.vision_config.vision_output_dim + self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1 + + self.vision_model = MllamaVisionModel._from_config(config.vision_config) + self.language_model = MllamaTextModel._from_config(config.text_config) + self.multi_modal_projector = nn.Linear( + config.vision_config.vision_output_dim, + config.text_config.hidden_size, + bias=True, + ) + init(self.vision_model) + init(self.language_model) self._init_head_modules() + self.post_init() - self.init_weights() + def get_input_embeddings(self): + return self.language_model.get_input_embeddings() - # Model parallel - self.model_parallel = False - self.device_map = None - self.post_init() + def set_input_embeddings(self, value): + self.language_model.set_input_embeddings(value) + + def get_output_embeddings(self): + return self.language_model.get_output_embeddings() + + def set_output_embeddings(self, new_embeddings): + self.language_model.set_output_embeddings(new_embeddings) + + def set_decoder(self, decoder): + self.language_model.set_decoder(decoder) + + def get_decoder(self): + return self.language_model.get_decoder() + + def tie_weights(self): + return self.language_model.tie_weights() def forward( self, input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + aspect_ratio_mask: Optional[torch.Tensor] = None, + aspect_ratio_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, - position_ids: Optional[torch.LongTensor] = None, - cross_attention_states: Optional[torch.FloatTensor] = None, cross_attention_mask: Optional[torch.Tensor] = None, - full_text_row_masked_out_mask: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, + cross_attention_states: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, + num_logits_to_keep: int = 0, head=None, output_adapter_gating_scores=False, output_adapter_fusion_attentions=False, **kwargs, - ) -> Union[Tuple, BaseModelOutputWithPast]: + ): # TODO -> output format + + # TODO: incorporate adapter logic with Forwardcontext and heads + # Establish parameter values output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions output_hidden_states = ( output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states ) - use_cache = use_cache if use_cache is not None else self.config.use_cache return_dict = return_dict if return_dict is not None else self.config.use_return_dict - outputs, context = self.model( + # Check invalid argument combinations + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError("You must specify exactly one of input_ids or inputs_embeds") + if pixel_values is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one" + ) + if pixel_values is not None and cross_attention_states is not None: + raise ValueError("`pixel_values` and `cross_attention_states` cannot be provided simultaneously") + + # If image is provided compute cross_attention_states + if pixel_values is not None: + if aspect_ratio_ids is None: + raise ValueError("`aspect_ratio_ids` must be provided if `pixel_values` is provided") + vision_outputs = self.vision_model( + pixel_values=pixel_values, + aspect_ratio_ids=aspect_ratio_ids, + aspect_ratio_mask=aspect_ratio_mask, + output_hidden_states=output_hidden_states, + output_attentions=output_attentions, + return_dict=return_dict, + ) + cross_attention_states = vision_outputs[0] + cross_attention_states = self.multi_modal_projector(cross_attention_states).reshape( + -1, cross_attention_states.shape[-2], self.hidden_size + ) + + # Compute cross_attention_mask + if cross_attention_mask is not None: + cross_attention_mask, full_text_row_masked_out_mask = _prepare_cross_attention_mask( + cross_attention_mask, + num_vision_tokens=self.vision_model.num_patches, + dtype=self.dtype, + ) + else: + full_text_row_masked_out_mask = None + if cross_attention_mask is not None and cache_position is not None: + cross_attention_mask = cross_attention_mask[:, :, cache_position] + full_text_row_masked_out_mask = full_text_row_masked_out_mask[:, :, cache_position] + + outputs = self.language_model( input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids, @@ -80,74 +152,115 @@ def forward( cross_attention_mask=cross_attention_mask, full_text_row_masked_out_mask=full_text_row_masked_out_mask, past_key_values=past_key_values, - inputs_embeds=inputs_embeds, use_cache=use_cache, - output_attentions=output_attentions, + inputs_embeds=inputs_embeds, + labels=labels, output_hidden_states=output_hidden_states, + output_attentions=output_attentions, return_dict=return_dict, cache_position=cache_position, - output_adapter_gating_scores=output_adapter_gating_scores, - output_adapter_fusion_attentions=output_adapter_fusion_attentions, - adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), - output_context=True, ) - kwargs["context"] = context - batch_size = outputs[0].shape[0] - if self.config.pad_token_id is None: - # TODO-AH: this may result in unexpected behavior for classification. Find a better way to do this? - sequence_lengths = -1 - else: - if input_ids is not None: - sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 - (sequence_lengths,) = adjust_tensors_for_parallel(outputs[0], sequence_lengths) - else: - sequence_lengths = -1 - logger.warning( - f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " - "unexpected if using padding tokens in conjunction with `inputs_embeds.`" - ) - - cls_logits = outputs[0][range(batch_size), sequence_lengths] - - outputs = self.forward_head( - outputs, - head_name=head, - cls_output=cls_logits, - attention_mask=attention_mask, - return_dict=return_dict, - **kwargs, - ) + # TODO: head logic, until now just copied! + hidden_states = outputs[0] + logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float() + + loss = None + if labels is not None: + loss = self.loss_function(logits, labels, self.vocab_size, **kwargs) - return outputs + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) def prepare_inputs_for_generation( - self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs + self, + input_ids=None, + inputs_embeds=None, + attention_mask=None, + position_ids=None, + pixel_values=None, + aspect_ratio_ids=None, + aspect_ratio_mask=None, + cross_attention_mask=None, + past_key_values=None, + use_cache=False, + cache_position=None, + num_logits_to_keep=None, + **kwargs, ): - if past_key_values: - input_ids = input_ids[:, -1:] + # Overwritten -- in specific circumstances we don't want to forward image inputs to the model + + # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens + # Exception 1: when passing input_embeds, input_ids may be missing entries + # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here + if past_key_values is not None: + if inputs_embeds is not None: # Exception 1 + input_ids = input_ids[:, -cache_position.shape[0] :] + elif input_ids.shape[1] != cache_position.shape[0]: # Default case (the "else", a no op, is Exception 2) + input_ids = input_ids[:, cache_position] - position_ids = kwargs.get("position_ids", None) + # TODO: we have no attention_mask so this won't work, check if we really won't need attention mask and find another way if attention_mask is not None and position_ids is None: # create position_ids on the fly for batch generation position_ids = attention_mask.long().cumsum(-1) - 1 position_ids.masked_fill_(attention_mask == 0, 1) if past_key_values: - position_ids = position_ids[:, -1].unsqueeze(-1) + position_ids = position_ids[:, -input_ids.shape[1] :] + + # This `clone` call is needed to avoid recapturing cuda graphs with `torch.compile`'s `mode="reduce-overhead`, as otherwise the input `position_ids` would have various stride during the decoding. Here, simply using `.contiguous()` is not sufficient as in the batch size = 1 case, `position_ids` is already contiguous but with varying stride which retriggers a capture. + position_ids = position_ids.clone(memory_format=torch.contiguous_format) # if `inputs_embeds` are passed, we only want to use them in the 1st generation step - if inputs_embeds is not None and past_key_values is None: - model_inputs = {"inputs_embeds": inputs_embeds} + if inputs_embeds is not None and cache_position[0] == 0: + model_inputs = {"inputs_embeds": inputs_embeds, "input_ids": None} else: - model_inputs = {"input_ids": input_ids} + # The clone here is for the same reason as for `position_ids`. + model_inputs = {"input_ids": input_ids.clone(memory_format=torch.contiguous_format), "inputs_embeds": None} + + if num_logits_to_keep is not None: + model_inputs["num_logits_to_keep"] = num_logits_to_keep model_inputs.update( { "position_ids": position_ids, + "cache_position": cache_position, "past_key_values": past_key_values, - "use_cache": kwargs.get("use_cache"), + "use_cache": use_cache, "attention_mask": attention_mask, - "adapter_input_parallelized": kwargs.pop("adapter_input_parallelized", False), + "cross_attention_mask": cross_attention_mask, } ) + + # If we're in pre-fill or cacheless decoding step, then we need pixel_values and aspect ratios + # to compute image hidden states, otherwise they are cached within each cross attn layer + if cache_position[0] == 0: + model_inputs["pixel_values"] = pixel_values + model_inputs["aspect_ratio_ids"] = aspect_ratio_ids + model_inputs["aspect_ratio_mask"] = aspect_ratio_mask + return model_inputs + + def _update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, **kwargs): + cross_attention_mask_prev = model_kwargs.get("cross_attention_mask", None) + model_kwargs = super()._update_model_kwargs_for_generation( + outputs=outputs, + model_kwargs=model_kwargs, + is_encoder_decoder=is_encoder_decoder, + **kwargs, + ) + + # add cross-attn mask for new token + if cross_attention_mask_prev is not None: + model_kwargs["cross_attention_mask"] = torch.cat( + [cross_attention_mask_prev, cross_attention_mask_prev[:, -1:, ...]], dim=1 + ) + return model_kwargs From 8f9298a278138dd0c15ec47b2b30114af143dc7c Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 8 Jan 2025 11:04:16 +0100 Subject: [PATCH 41/63] Draft mllama adapter mixins --- src/adapters/models/mllama/mixin_mllama.py | 144 +++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 src/adapters/models/mllama/mixin_mllama.py diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py new file mode 100644 index 0000000000..a40f384756 --- /dev/null +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -0,0 +1,144 @@ +from typing import Iterable, Tuple + +import torch.nn as nn + +from ...composition import adjust_tensors_for_parallel_ +from ...methods.bottleneck import BottleneckLayer +from ...methods.lora import LoRALinear +from ...methods.prefix_tuning import PrefixTuningLayer +from ...methods.reft import ReftLayer, hook_fn +from ...model_mixin import ( + EmbeddingAdaptersMixin, + InvertibleAdaptersMixin, + ModelBaseAdaptersMixin, +) +from ...utils import patch_forward + + +class MllamaBaseAttentionAdaptersMixin: + """Base mixin class for adding adapter support to attention modules in MLLaMA. + + Implements common adapter functionality for all attention variants including: + - LoRA adapters for query, key, and value projections + - Additional Prefix tuning layer + + This base implementation ensures consistent adapter behavior across different + attention mechanisms in the model. + """ + + def init_adapters(self, model_config, adapters_config): + # Wrap layers for LoRA + self.q_proj = LoRALinear.wrap(self.q_proj, "selfattn", model_config, adapters_config, attn_key="q") + self.k_proj = LoRALinear.wrap(self.k_proj, "selfattn", model_config, adapters_config, attn_key="k") + self.v_proj = LoRALinear.wrap(self.v_proj, "selfattn", model_config, adapters_config, attn_key="v") + + self.prefix_tuning = PrefixTuningLayer( + "self_prefix", model_config, adapters_config, add_model_type_to_key=True + ) + patch_forward(self) + + +class MllamaVisionAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's vision attention module.""" + + +class MllamaTextCrossAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's cross-attention module.""" + + +class MllamaTextSelfAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's self-attention module.""" + + +class MllamaBaseLayerAdaptersMixin: + """Base mixin class for adding adapter support to MLLaMA layer modules. + + Implements common layer-level adapter functionality including: + - LoRA adapters for MLP layers (fc1/fc2) + - Bottleneck adapters for attention and output + - Forward pass patching for adapter integration + """ + + def init_adapters(self, model_config, adapters_config): + # Wrap layers for LoRA + self.mlp.fc1 = LoRALinear.wrap(self.mlp.fc1, "intermediate", model_config, adapters_config) + self.mlp.fc2 = LoRALinear.wrap(self.mlp.fc2, "output", model_config, adapters_config) + + self.attention_adapters = BottleneckLayer("mh_adapter") + self.output_adapters = BottleneckLayer("output_adapter") + + patch_forward(self) + + +class MllamaVisionEncoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's vision encoder layers.""" + + +class MllamaSelfAttentionDecoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's self-attention decoder layers.""" + + +class MllamaCrossAttentionDecoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's cross-attention decoder layers.""" + + +class MllamaVisionEncoderAdaptersMixin: + """Mixin for adding adapter support to MLLaMA's vision encoder module. + + Implements parallel composition support for vision encoder layers by: + - Setting up hooks to adjust tensors during forward pass for parallel adapter processing + """ + + def init_adapters(self, model_config, adapters_config): + # Set hook for parallel composition + for layer in self.layers: + self._set_layer_hook_for_parallel(layer) + + def _set_layer_hook_for_parallel(self, layer: nn.Module): + def hook(module, input): + adjust_tensors_for_parallel_(input[0], input[1]) + return input + + layer.register_forward_pre_hook(hook) + + +class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): + """Adds adapters to the a MllamaVisionModel class.""" + + support_prompt_tuning = False + + def init_adapters(self, model_config, adapters_config): + super().init_adapters(model_config, adapters_config) + + # Register hook for post embedding forward + self.embed_tokens.register_forward_hook(self.post_embedding_forward) + + def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: + for i, layer in enumerate(self.layers): + yield i, layer + + def post_embedding_forward(self, module, args, embedding_output): + embedding_output = self.invertible_adapters_forward(embedding_output) + # Prompt tuning not yet supported + return embedding_output + + +class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin): + """Adds adapters to the a MllamaTextModel class.""" + + support_prompt_tuning = False + + def init_adapters(self, model_config, adapters_config): + super().init_adapters(model_config, adapters_config) + + # Register hook for post embedding forward + self.embed_tokens.register_forward_hook(self.post_embedding_forward) + + def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: + for i, layer in enumerate(self.layers): + yield i, layer + + def post_embedding_forward(self, module, args, embedding_output): + embedding_output = self.invertible_adapters_forward(embedding_output) + # Prompt tuning not yet supported + return embedding_output From d67692daba321539a9e880daa0f0c960daea648f Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 10 Jan 2025 10:37:29 +0100 Subject: [PATCH 42/63] Fix import structure --- src/adapters/__init__.py | 2 ++ src/adapters/models/__init__.py | 21 +++++++++++++++++++ src/adapters/models/auto/adapter_model.py | 1 + .../models/mllama/{_init_.py => __init__.py} | 0 src/adapters/wrappers/configuration.py | 1 + 5 files changed, 25 insertions(+) rename src/adapters/models/mllama/{_init_.py => __init__.py} (100%) diff --git a/src/adapters/__init__.py b/src/adapters/__init__.py index 88549c6969..ab24352678 100644 --- a/src/adapters/__init__.py +++ b/src/adapters/__init__.py @@ -111,6 +111,7 @@ "models.llama": ["LlamaAdapterModel"], "models.mbart": ["MBartAdapterModel"], "models.mistral": ["MistralAdapterModel"], + "models.mllama": ["MllamaAdapterModel"], "models.mt5": ["MT5AdapterModel"], "models.plbart": ["PLBartAdapterModel"], "models.roberta": ["RobertaAdapterModel"], @@ -222,6 +223,7 @@ from .models.llama import LlamaAdapterModel from .models.mbart import MBartAdapterModel from .models.mistral import MistralAdapterModel + from .models.mllama import MllamaAdapterModel from .models.mt5 import MT5AdapterModel from .models.plbart import PLBartAdapterModel from .models.roberta import RobertaAdapterModel diff --git a/src/adapters/models/__init__.py b/src/adapters/models/__init__.py index 77f569835d..52adf5030d 100644 --- a/src/adapters/models/__init__.py +++ b/src/adapters/models/__init__.py @@ -42,6 +42,17 @@ ) from .xmod.mixin_xmod import XmodModelAdaptersMixin +from .mllama.mixin_mllama import ( + MllamaCrossAttentionDecoderLayerAdaptersMixin, + MllamaSelfAttentionDecoderLayerAdaptersMixin, + MllamaTextCrossAttentionAdaptersMixin, + MllamaTextModelAdaptersMixin, + MllamaTextSelfAttentionAdaptersMixin, + MllamaVisionAttentionAdaptersMixin, + MllamaVisionEncoderAdaptersMixin, + MllamaVisionEncoderLayerAdaptersMixin, + MllamaVisionModelAdaptersMixin, +) # IMPORTANT: Only add classes to this mapping that are not copied into the adapters package MODEL_MIXIN_MAPPING = { @@ -109,4 +120,14 @@ "WhisperForAudioClassification": WhisperForAudioClassificationWithHeadsMixin, "LlamaForQuestionAnswering": LlamaForQuestionAnsweringAdapterMixin, "MistralModel": MistralModelAdapterMixin, + # Mulitmodal Llama + "MllamaVisionModel": MllamaVisionModelAdaptersMixin, + "MllamaTextModel": MllamaTextModelAdaptersMixin, + "MllamaVisionEncoder": MllamaVisionEncoderAdaptersMixin, + "MllamaVisionAttention": MllamaVisionAttentionAdaptersMixin, + "MllamaTextSelfAttention": MllamaTextSelfAttentionAdaptersMixin, + "MllamaTextCrossAttention": MllamaTextCrossAttentionAdaptersMixin, + "MllamaVisionEncoderLayer": MllamaVisionEncoderLayerAdaptersMixin, + "MllamaSelfAttentionDecoderLayer": MllamaSelfAttentionDecoderLayerAdaptersMixin, + "MllamaCrossAttentionDecoderLayer": MllamaCrossAttentionDecoderLayerAdaptersMixin, } diff --git a/src/adapters/models/auto/adapter_model.py b/src/adapters/models/auto/adapter_model.py index 6711752054..5f2497ff8b 100644 --- a/src/adapters/models/auto/adapter_model.py +++ b/src/adapters/models/auto/adapter_model.py @@ -24,6 +24,7 @@ ("llama", "LlamaAdapterModel"), ("mbart", "MBartAdapterModel"), ("mistral", "MistralAdapterModel"), + ("mllama", "MllamaAdapterModel") ("mt5", "MT5AdapterModel"), ("plbart", "PLBartAdapterModel"), ("roberta", "RobertaAdapterModel"), diff --git a/src/adapters/models/mllama/_init_.py b/src/adapters/models/mllama/__init__.py similarity index 100% rename from src/adapters/models/mllama/_init_.py rename to src/adapters/models/mllama/__init__.py diff --git a/src/adapters/wrappers/configuration.py b/src/adapters/wrappers/configuration.py index 40dc421787..f4513042fd 100644 --- a/src/adapters/wrappers/configuration.py +++ b/src/adapters/wrappers/configuration.py @@ -68,6 +68,7 @@ "attention_probs_dropout_prob": "attention_dropout", }, "xlm_roberta": {}, + # TODO: add mllama } SUBMODEL_NAMES = {"clip": ["vision_config", "text_config"], "encoder-decoder": ["encoder", "decoder"]} From 958b2c6bbc5b2bd58fe7336ec555b4a08aac92e1 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 10 Jan 2025 16:54:27 +0100 Subject: [PATCH 43/63] Reuse mixin implementations --- src/adapters/models/mllama/mixin_mllama.py | 90 +++++----------------- 1 file changed, 18 insertions(+), 72 deletions(-) diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index a40f384756..cfa55d547f 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -12,94 +12,36 @@ InvertibleAdaptersMixin, ModelBaseAdaptersMixin, ) -from ...utils import patch_forward +from ..llama.mixin_llama import LlamaAttentionMixin, LlamaDecoderLayerMixin +from ..clip.mixin_clip import CLIPEncoderLayerAdaptersMixin, CLIPEncoderAdaptersMixin -class MllamaBaseAttentionAdaptersMixin: - """Base mixin class for adding adapter support to attention modules in MLLaMA. - - Implements common adapter functionality for all attention variants including: - - LoRA adapters for query, key, and value projections - - Additional Prefix tuning layer - - This base implementation ensures consistent adapter behavior across different - attention mechanisms in the model. - """ - - def init_adapters(self, model_config, adapters_config): - # Wrap layers for LoRA - self.q_proj = LoRALinear.wrap(self.q_proj, "selfattn", model_config, adapters_config, attn_key="q") - self.k_proj = LoRALinear.wrap(self.k_proj, "selfattn", model_config, adapters_config, attn_key="k") - self.v_proj = LoRALinear.wrap(self.v_proj, "selfattn", model_config, adapters_config, attn_key="v") - - self.prefix_tuning = PrefixTuningLayer( - "self_prefix", model_config, adapters_config, add_model_type_to_key=True - ) - patch_forward(self) - - -class MllamaVisionAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): +class MllamaVisionAttentionAdaptersMixin(LlamaAttentionMixin): """Mixin for adding adapter support to MLLaMA's vision attention module.""" -class MllamaTextCrossAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): +class MllamaTextCrossAttentionAdaptersMixin(LlamaAttentionMixin): """Mixin for adding adapter support to MLLaMA's cross-attention module.""" -class MllamaTextSelfAttentionAdaptersMixin(MllamaBaseAttentionAdaptersMixin): +class MllamaTextSelfAttentionAdaptersMixin(LlamaAttentionMixin): """Mixin for adding adapter support to MLLaMA's self-attention module.""" -class MllamaBaseLayerAdaptersMixin: - """Base mixin class for adding adapter support to MLLaMA layer modules. - - Implements common layer-level adapter functionality including: - - LoRA adapters for MLP layers (fc1/fc2) - - Bottleneck adapters for attention and output - - Forward pass patching for adapter integration - """ - - def init_adapters(self, model_config, adapters_config): - # Wrap layers for LoRA - self.mlp.fc1 = LoRALinear.wrap(self.mlp.fc1, "intermediate", model_config, adapters_config) - self.mlp.fc2 = LoRALinear.wrap(self.mlp.fc2, "output", model_config, adapters_config) - - self.attention_adapters = BottleneckLayer("mh_adapter") - self.output_adapters = BottleneckLayer("output_adapter") - - patch_forward(self) - - -class MllamaVisionEncoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): +class MllamaVisionEncoderLayerAdaptersMixin(CLIPEncoderLayerAdaptersMixin): """Mixin for adding adapter support to MLLaMA's vision encoder layers.""" -class MllamaSelfAttentionDecoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): +class MllamaSelfAttentionDecoderLayerAdaptersMixin(LlamaDecoderLayerMixin): """Mixin for adding adapter support to MLLaMA's self-attention decoder layers.""" -class MllamaCrossAttentionDecoderLayerAdaptersMixin(MllamaBaseLayerAdaptersMixin): +class MllamaCrossAttentionDecoderLayerAdaptersMixin(LlamaDecoderLayerMixin): """Mixin for adding adapter support to MLLaMA's cross-attention decoder layers.""" -class MllamaVisionEncoderAdaptersMixin: - """Mixin for adding adapter support to MLLaMA's vision encoder module. - - Implements parallel composition support for vision encoder layers by: - - Setting up hooks to adjust tensors during forward pass for parallel adapter processing - """ - - def init_adapters(self, model_config, adapters_config): - # Set hook for parallel composition - for layer in self.layers: - self._set_layer_hook_for_parallel(layer) - - def _set_layer_hook_for_parallel(self, layer: nn.Module): - def hook(module, input): - adjust_tensors_for_parallel_(input[0], input[1]) - return input - - layer.register_forward_pre_hook(hook) +class MllamaVisionEncoderAdaptersMixin(CLIPEncoderAdaptersMixin): + """Mixin for adding adapter support to MLLaMA's vision encoder module. """ class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): @@ -110,11 +52,15 @@ class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): def init_adapters(self, model_config, adapters_config): super().init_adapters(model_config, adapters_config) - # Register hook for post embedding forward - self.embed_tokens.register_forward_hook(self.post_embedding_forward) + # no embeddings therefore no post embedding forward def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: - for i, layer in enumerate(self.layers): + # Vision model has two encoders: + # 1. local transformer focusing on fine-grained, tile-level features + for i, layer in enumerate(self.transformer.layers): + yield i, layer + # 2. global transformer operating on output of the local transformer, integrating information across all tiles + for i, layer in enumerate(self.global_transformer.layers, start=len(self.transformer.layers)): yield i, layer def post_embedding_forward(self, module, args, embedding_output): @@ -141,4 +87,4 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: def post_embedding_forward(self, module, args, embedding_output): embedding_output = self.invertible_adapters_forward(embedding_output) # Prompt tuning not yet supported - return embedding_output + return embedding_output \ No newline at end of file From 7507e1e659903b558e587d0d242fdf05a5304ec3 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 13 Jan 2025 16:36:02 +0100 Subject: [PATCH 44/63] Create MllamaModel class and adjust mixins accordingly --- src/adapters/models/mllama/adapter_model.py | 159 ++++++-------------- src/adapters/models/mllama/mixin_mllama.py | 79 +++++++--- 2 files changed, 102 insertions(+), 136 deletions(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 5dd58f19cc..73f845105b 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -5,18 +5,17 @@ from torch import nn from adapters.heads.language_modeling import CausalLMOutputWithPast -from hf_transformers.build.lib.transformers.cache_utils import Cache -from hf_transformers.build.lib.transformers.modeling_outputs import BaseModelOutputWithPast -from transformers.models.mllama import ( +from transformers.cache_utils import Cache +from transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.models.mllama.modeling_mllama import ( MLLAMA_START_DOCSTRING, MllamaPreTrainedModel, - MllamaVisionModel, MllamaTextModel, + MllamaVisionModel, + _prepare_cross_attention_mask, ) -from transformers.models.mllama.modeling_mllama import _prepare_cross_attention_mask from transformers.utils import add_start_docstrings -from ...composition import adjust_tensors_for_parallel from ...heads import ModelWithFlexibleHeadsAdaptersMixin from ...model_mixin import EmbeddingAdaptersWrapperMixin from ...wrappers import init @@ -25,8 +24,11 @@ logger = logging.getLogger(__name__) -@add_start_docstrings(MLLAMA_START_DOCSTRING) -class MllamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel): +class MllamaModel(MllamaPreTrainedModel): + """ + Base MLLaMA model that provides the fundamental architecture combining vision and text. + This serves as the foundation for the specialized adapter model version. + """ def __init__(self, config): super().__init__(config) @@ -43,10 +45,6 @@ def __init__(self, config): config.text_config.hidden_size, bias=True, ) - - init(self.vision_model) - init(self.language_model) - self._init_head_modules() self.post_init() def get_input_embeddings(self): @@ -89,13 +87,7 @@ def forward( return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, num_logits_to_keep: int = 0, - head=None, - output_adapter_gating_scores=False, - output_adapter_fusion_attentions=False, - **kwargs, - ): # TODO -> output format - - # TODO: incorporate adapter logic with Forwardcontext and heads + ) -> Union[Tuple, BaseModelOutputWithPast]: # Establish parameter values output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions @@ -161,106 +153,43 @@ def forward( cache_position=cache_position, ) - # TODO: head logic, until now just copied! - hidden_states = outputs[0] - logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :]).float() + return outputs - loss = None - if labels is not None: - loss = self.loss_function(logits, labels, self.vocab_size, **kwargs) - if not return_dict: - output = (logits,) + outputs[1:] - return (loss,) + output if loss is not None else output +@add_start_docstrings(MLLAMA_START_DOCSTRING) +class MllamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel): - return CausalLMOutputWithPast( - loss=loss, - logits=logits, - past_key_values=outputs.past_key_values, - hidden_states=outputs.hidden_states, - attentions=outputs.attentions, - ) + def __init__(self, config): + super().__init__(config) + + self.model = MllamaModel(config) + init(self.model) - def prepare_inputs_for_generation( + self._init_head_modules() + self.post_init() + + def forward( self, - input_ids=None, - inputs_embeds=None, - attention_mask=None, - position_ids=None, - pixel_values=None, - aspect_ratio_ids=None, - aspect_ratio_mask=None, - cross_attention_mask=None, - past_key_values=None, - use_cache=False, - cache_position=None, - num_logits_to_keep=None, + input_ids: Optional[torch.LongTensor] = None, + pixel_values: Optional[torch.FloatTensor] = None, + aspect_ratio_mask: Optional[torch.Tensor] = None, + aspect_ratio_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + cross_attention_mask: Optional[torch.Tensor] = None, + cross_attention_states: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + num_logits_to_keep: int = 0, + head=None, + output_adapter_gating_scores=False, + output_adapter_fusion_attentions=False, **kwargs, ): - # Overwritten -- in specific circumstances we don't want to forward image inputs to the model - - # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens - # Exception 1: when passing input_embeds, input_ids may be missing entries - # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here - if past_key_values is not None: - if inputs_embeds is not None: # Exception 1 - input_ids = input_ids[:, -cache_position.shape[0] :] - elif input_ids.shape[1] != cache_position.shape[0]: # Default case (the "else", a no op, is Exception 2) - input_ids = input_ids[:, cache_position] - - # TODO: we have no attention_mask so this won't work, check if we really won't need attention mask and find another way - if attention_mask is not None and position_ids is None: - # create position_ids on the fly for batch generation - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - if past_key_values: - position_ids = position_ids[:, -input_ids.shape[1] :] - - # This `clone` call is needed to avoid recapturing cuda graphs with `torch.compile`'s `mode="reduce-overhead`, as otherwise the input `position_ids` would have various stride during the decoding. Here, simply using `.contiguous()` is not sufficient as in the batch size = 1 case, `position_ids` is already contiguous but with varying stride which retriggers a capture. - position_ids = position_ids.clone(memory_format=torch.contiguous_format) - - # if `inputs_embeds` are passed, we only want to use them in the 1st generation step - if inputs_embeds is not None and cache_position[0] == 0: - model_inputs = {"inputs_embeds": inputs_embeds, "input_ids": None} - else: - # The clone here is for the same reason as for `position_ids`. - model_inputs = {"input_ids": input_ids.clone(memory_format=torch.contiguous_format), "inputs_embeds": None} - - if num_logits_to_keep is not None: - model_inputs["num_logits_to_keep"] = num_logits_to_keep - - model_inputs.update( - { - "position_ids": position_ids, - "cache_position": cache_position, - "past_key_values": past_key_values, - "use_cache": use_cache, - "attention_mask": attention_mask, - "cross_attention_mask": cross_attention_mask, - } - ) - - # If we're in pre-fill or cacheless decoding step, then we need pixel_values and aspect ratios - # to compute image hidden states, otherwise they are cached within each cross attn layer - if cache_position[0] == 0: - model_inputs["pixel_values"] = pixel_values - model_inputs["aspect_ratio_ids"] = aspect_ratio_ids - model_inputs["aspect_ratio_mask"] = aspect_ratio_mask - - return model_inputs - - def _update_model_kwargs_for_generation(self, outputs, model_kwargs, is_encoder_decoder, **kwargs): - cross_attention_mask_prev = model_kwargs.get("cross_attention_mask", None) - model_kwargs = super()._update_model_kwargs_for_generation( - outputs=outputs, - model_kwargs=model_kwargs, - is_encoder_decoder=is_encoder_decoder, - **kwargs, - ) - - # add cross-attn mask for new token - if cross_attention_mask_prev is not None: - model_kwargs["cross_attention_mask"] = torch.cat( - [cross_attention_mask_prev, cross_attention_mask_prev[:, -1:, ...]], dim=1 - ) - return model_kwargs + pass diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index cfa55d547f..0f629ddc30 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -2,18 +2,15 @@ import torch.nn as nn -from ...composition import adjust_tensors_for_parallel_ -from ...methods.bottleneck import BottleneckLayer -from ...methods.lora import LoRALinear -from ...methods.prefix_tuning import PrefixTuningLayer from ...methods.reft import ReftLayer, hook_fn from ...model_mixin import ( EmbeddingAdaptersMixin, InvertibleAdaptersMixin, + InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin, ) +from ..clip.mixin_clip import CLIPEncoderAdaptersMixin, CLIPEncoderLayerAdaptersMixin from ..llama.mixin_llama import LlamaAttentionMixin, LlamaDecoderLayerMixin -from ..clip.mixin_clip import CLIPEncoderLayerAdaptersMixin, CLIPEncoderAdaptersMixin class MllamaVisionAttentionAdaptersMixin(LlamaAttentionMixin): @@ -41,7 +38,7 @@ class MllamaCrossAttentionDecoderLayerAdaptersMixin(LlamaDecoderLayerMixin): class MllamaVisionEncoderAdaptersMixin(CLIPEncoderAdaptersMixin): - """Mixin for adding adapter support to MLLaMA's vision encoder module. """ + """Mixin for adding adapter support to MLLaMA's vision encoder module.""" class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): @@ -49,11 +46,6 @@ class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): support_prompt_tuning = False - def init_adapters(self, model_config, adapters_config): - super().init_adapters(model_config, adapters_config) - - # no embeddings therefore no post embedding forward - def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: # Vision model has two encoders: # 1. local transformer focusing on fine-grained, tile-level features @@ -63,11 +55,6 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: for i, layer in enumerate(self.global_transformer.layers, start=len(self.transformer.layers)): yield i, layer - def post_embedding_forward(self, module, args, embedding_output): - embedding_output = self.invertible_adapters_forward(embedding_output) - # Prompt tuning not yet supported - return embedding_output - class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin): """Adds adapters to the a MllamaTextModel class.""" @@ -78,13 +65,63 @@ def init_adapters(self, model_config, adapters_config): super().init_adapters(model_config, adapters_config) # Register hook for post embedding forward - self.embed_tokens.register_forward_hook(self.post_embedding_forward) + self.embeddings.register_forward_hook(self.post_embedding_forward) + + def post_embedding_forward(self, module, args, embedding_output): + embedding_output = self.invertible_adapters_forward(embedding_output) + # Prompt tuning not yet supported + return embedding_output def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: for i, layer in enumerate(self.layers): yield i, layer - def post_embedding_forward(self, module, args, embedding_output): - embedding_output = self.invertible_adapters_forward(embedding_output) - # Prompt tuning not yet supported - return embedding_output \ No newline at end of file + +class MllamaAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin): + """ + Adds adapters to the MLLaMA model, handling both vision and text components. + """ + + invertible_adapters_base_name = "language_model" # Changed from text_model to match MLLaMA's naming + support_prompt_tuning = False + + def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: + layer_idx = 0 + + # First iterate through vision model's local transformer layers + for _, layer in enumerate(self.vision_model.iter_layers()): + yield layer_idx, layer + layer_idx += 1 + + for _, layer in enumerate(self.language_model.layers): + yield layer_idx, layer + layer_idx += 1 + + def _init_adapters_submodules(self, model_config, adapters_config): + """Initialize adapters in vision and language models separately.""" + # Initialize vision model adapters + for module in self.vision_model.modules(): + if hasattr(module, "init_adapters"): + module.init_adapters(model_config.vision_config, adapters_config) + + # Initialize language model adapters + for module in self.language_model.modules(): + if hasattr(module, "init_adapters"): + module.init_adapters(model_config.text_config, adapters_config) + + # Initialize ReFT for all layers if needed + self._init_reft_layers(model_config, adapters_config) + + def _init_reft_layers(self, model_config, adapters_config): + """Initialize ReFT layers for both vision and language components.""" + # Vision local transformer + for _, layer in self.vision_model.iter_layers(): + if not hasattr(layer, "reft_layer"): + layer.reft_layer = ReftLayer("output", model_config.vision_config, adapters_config) + layer.register_forward_hook(hook_fn) + + # Language model layers + for _, layer in self.language_model.iter_layers(): + if not hasattr(layer, "reft_layer"): + layer.reft_layer = ReftLayer("output", model_config.text_config, adapters_config) + layer.register_forward_hook(hook_fn) From d2a28d8114c6a4cd1157dde86fe0df5c817cb341 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 13 Jan 2025 16:43:08 +0100 Subject: [PATCH 45/63] Re-implement MllamaAdapterModel --- src/adapters/models/mllama/adapter_model.py | 36 ++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 73f845105b..eb475554c1 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -16,6 +16,7 @@ ) from transformers.utils import add_start_docstrings +from ...context import AdapterSetup from ...heads import ModelWithFlexibleHeadsAdaptersMixin from ...model_mixin import EmbeddingAdaptersWrapperMixin from ...wrappers import init @@ -192,4 +193,37 @@ def forward( output_adapter_fusion_attentions=False, **kwargs, ): - pass + + outputs, context = self.model( + input_ids=input_ids, + pixel_values=pixel_values, + aspect_ratio_mask=aspect_ratio_mask, + aspect_ratio_ids=aspect_ratio_ids, + attention_mask=attention_mask, + cross_attention_mask=cross_attention_mask, + cross_attention_states=cross_attention_states, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + labels=labels, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + cache_position=cache_position, + num_logits_to_keep=num_logits_to_keep, + adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), + output_context=True, + ) + kwargs["context"] = context + + if head or AdapterSetup.get_context_head_setup() or self.active_head: + head_outputs = self.forward_head( + outputs, + head_name=head, + attention_mask=attention_mask, + return_dict=return_dict, + **kwargs, + ) + return head_outputs + return outputs From 6d39941f826f1cad061cb414fd3fea9d2f5be572 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 13 Jan 2025 17:47:42 +0100 Subject: [PATCH 46/63] Fix typos --- src/adapters/models/__init__.py | 24 ++++++++++++---------- src/adapters/models/auto/adapter_model.py | 2 +- src/adapters/models/mllama/mixin_mllama.py | 11 +++++----- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/adapters/models/__init__.py b/src/adapters/models/__init__.py index 52adf5030d..2b7c55fc34 100644 --- a/src/adapters/models/__init__.py +++ b/src/adapters/models/__init__.py @@ -20,6 +20,18 @@ from .gptj.mixin_gptj import GPTJMLPAdaptersMixin, GPTJModelAdapterMixin from .llama.mixin_llama import LlamaForQuestionAnsweringAdapterMixin, LlamaModelAdapterMixin from .mistral.mixin_mistral import MistralModelAdapterMixin +from .mllama.mixin_mllama import ( + MllamaAdaptersMixin, + MllamaCrossAttentionDecoderLayerAdaptersMixin, + MllamaSelfAttentionDecoderLayerAdaptersMixin, + MllamaTextCrossAttentionAdaptersMixin, + MllamaTextModelAdaptersMixin, + MllamaTextSelfAttentionAdaptersMixin, + MllamaVisionAttentionAdaptersMixin, + MllamaVisionEncoderAdaptersMixin, + MllamaVisionEncoderLayerAdaptersMixin, + MllamaVisionModelAdaptersMixin, +) from .plbart.mixin_plbart import ( PLBartDecoderAdaptersMixin, PLBartDecoderWrapperAdaptersMixin, @@ -42,17 +54,6 @@ ) from .xmod.mixin_xmod import XmodModelAdaptersMixin -from .mllama.mixin_mllama import ( - MllamaCrossAttentionDecoderLayerAdaptersMixin, - MllamaSelfAttentionDecoderLayerAdaptersMixin, - MllamaTextCrossAttentionAdaptersMixin, - MllamaTextModelAdaptersMixin, - MllamaTextSelfAttentionAdaptersMixin, - MllamaVisionAttentionAdaptersMixin, - MllamaVisionEncoderAdaptersMixin, - MllamaVisionEncoderLayerAdaptersMixin, - MllamaVisionModelAdaptersMixin, -) # IMPORTANT: Only add classes to this mapping that are not copied into the adapters package MODEL_MIXIN_MAPPING = { @@ -121,6 +122,7 @@ "LlamaForQuestionAnswering": LlamaForQuestionAnsweringAdapterMixin, "MistralModel": MistralModelAdapterMixin, # Mulitmodal Llama + "MllamaModel": MllamaAdaptersMixin, "MllamaVisionModel": MllamaVisionModelAdaptersMixin, "MllamaTextModel": MllamaTextModelAdaptersMixin, "MllamaVisionEncoder": MllamaVisionEncoderAdaptersMixin, diff --git a/src/adapters/models/auto/adapter_model.py b/src/adapters/models/auto/adapter_model.py index 5f2497ff8b..9921b1f87e 100644 --- a/src/adapters/models/auto/adapter_model.py +++ b/src/adapters/models/auto/adapter_model.py @@ -24,7 +24,7 @@ ("llama", "LlamaAdapterModel"), ("mbart", "MBartAdapterModel"), ("mistral", "MistralAdapterModel"), - ("mllama", "MllamaAdapterModel") + ("mllama", "MllamaAdapterModel"), ("mt5", "MT5AdapterModel"), ("plbart", "PLBartAdapterModel"), ("roberta", "RobertaAdapterModel"), diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 0f629ddc30..27b4aa7cbb 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -5,6 +5,7 @@ from ...methods.reft import ReftLayer, hook_fn from ...model_mixin import ( EmbeddingAdaptersMixin, + EmbeddingAdaptersWrapperMixin, InvertibleAdaptersMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin, @@ -65,7 +66,7 @@ def init_adapters(self, model_config, adapters_config): super().init_adapters(model_config, adapters_config) # Register hook for post embedding forward - self.embeddings.register_forward_hook(self.post_embedding_forward) + self.embed_tokens.register_forward_hook(self.post_embedding_forward) def post_embedding_forward(self, module, args, embedding_output): embedding_output = self.invertible_adapters_forward(embedding_output) @@ -77,23 +78,23 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: yield i, layer -class MllamaAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin): +class MllamaAdaptersMixin(EmbeddingAdaptersWrapperMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin): """ Adds adapters to the MLLaMA model, handling both vision and text components. """ - invertible_adapters_base_name = "language_model" # Changed from text_model to match MLLaMA's naming + invertible_adapters_base_name = "language_model" support_prompt_tuning = False def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: layer_idx = 0 # First iterate through vision model's local transformer layers - for _, layer in enumerate(self.vision_model.iter_layers()): + for _, layer in self.vision_model.iter_layers(): yield layer_idx, layer layer_idx += 1 - for _, layer in enumerate(self.language_model.layers): + for _, layer in self.language_model.iter_layers(): yield layer_idx, layer layer_idx += 1 From 4c153b4c00cbad6c2cc8bdedcfbfdadf63bdfd61 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 14 Jan 2025 11:57:51 +0100 Subject: [PATCH 47/63] Draft adapter attention classes --- src/adapters/models/mllama/adapter_model.py | 13 +- src/adapters/models/mllama/modeling_mllama.py | 476 ++++++++++++++++++ 2 files changed, 486 insertions(+), 3 deletions(-) create mode 100644 src/adapters/models/mllama/modeling_mllama.py diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index eb475554c1..b3e7b4fe52 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -87,7 +87,6 @@ def forward( output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, cache_position: Optional[torch.LongTensor] = None, - num_logits_to_keep: int = 0, ) -> Union[Tuple, BaseModelOutputWithPast]: # Establish parameter values @@ -160,6 +159,10 @@ def forward( @add_start_docstrings(MLLAMA_START_DOCSTRING) class MllamaAdapterModel(EmbeddingAdaptersWrapperMixin, ModelWithFlexibleHeadsAdaptersMixin, MllamaPreTrainedModel): + head_types = [ + "causal_lm", + ] + def __init__(self, config): super().__init__(config) @@ -211,15 +214,19 @@ def forward( output_hidden_states=output_hidden_states, return_dict=return_dict, cache_position=cache_position, - num_logits_to_keep=num_logits_to_keep, + output_adapter_gating_scores=output_adapter_gating_scores, + output_adapter_fusion_attentions=output_adapter_fusion_attentions, adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), output_context=True, ) kwargs["context"] = context + hidden_states = outputs[0] + head_input_states = hidden_states[:, -num_logits_to_keep:, :] + if head or AdapterSetup.get_context_head_setup() or self.active_head: head_outputs = self.forward_head( - outputs, + head_input_states, head_name=head, attention_mask=attention_mask, return_dict=return_dict, diff --git a/src/adapters/models/mllama/modeling_mllama.py b/src/adapters/models/mllama/modeling_mllama.py new file mode 100644 index 0000000000..036e39895f --- /dev/null +++ b/src/adapters/models/mllama/modeling_mllama.py @@ -0,0 +1,476 @@ +# coding=utf-8 +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import warnings +from typing import Optional, Tuple + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch import nn + +from adapters.composition import adjust_tensors_for_parallel, match_attn_matrices_for_parallel +from transformers.cache_utils import Cache +from transformers.models.mllama.modeling_mllama import ( + repeat_kv, + apply_rotary_pos_emb, + MllamaVisionAttention, + MllamaVisionSdpaAttention, + MllamaTextCrossAttention, + MllamaTextCrossSdpaAttention, + MllamaTextSelfAttention, + MllamaTextSelfSdpaAttention, +) +from transformers.utils import logging + +from .mixin_mllama import ( + MllamaVisionAttentionAdaptersMixin, + MllamaTextCrossAttentionAdaptersMixin, + MllamaTextSelfAttentionAdaptersMixin, +) + + +logger = logging.get_logger(__name__) + + +class MllamaVisionAttentionWithAdapters(MllamaVisionAttentionAdaptersMixin, MllamaVisionAttention): + + def forward( + self, + hidden_state: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: bool = None, + ) -> torch.Tensor: + query = self.q_proj(hidden_state) + key = self.k_proj(hidden_state) + value = self.v_proj(hidden_state) + + batch_size, q_seq_len, _ = query.shape + _, kv_seq_len, _ = key.shape + + query = query.view(batch_size, q_seq_len, self.num_heads, self.head_dim).transpose(1, 2) + key = key.view(batch_size, kv_seq_len, self.num_heads, self.head_dim).transpose(1, 2) + value = value.view(batch_size, kv_seq_len, self.num_heads, self.head_dim).transpose(1, 2) + + # >>> START AH Changes <<< + query, key, value = match_attn_matrices_for_parallel(query, key, value) + (attention_mask,) = adjust_tensors_for_parallel(query, attention_mask) + # >>> END AH Changes <<< + + attn_weights = torch.matmul(query, key.transpose(2, 3)) / math.sqrt(self.head_dim) + + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, : key.shape[-2]] + attn_weights = attn_weights + causal_mask + + # upcast attention to fp32 + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype) + attn_output = torch.matmul(attn_weights, value) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(batch_size, q_seq_len, -1) + + output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return output, attn_weights + + +class MllamaVisionSdpaAttentionWithAdapters(MllamaVisionAttentionAdaptersMixin, MllamaVisionSdpaAttention): + + def forward( + self, + hidden_state: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: bool = None, + ) -> torch.Tensor: + if output_attentions: + logger.warning_once( + "MllamaModel is using MllamaVisionSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to the manual attention implementation, " + 'but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.' + ) + return super().forward( + hidden_state=hidden_state, + attention_mask=attention_mask, + output_attentions=output_attentions, + ) + + query = self.q_proj(hidden_state) + key = self.k_proj(hidden_state) + value = self.v_proj(hidden_state) + + batch_size, q_seq_len, _ = query.shape + _, kv_seq_len, _ = key.shape + + query = query.view(batch_size, q_seq_len, self.num_heads, self.head_dim) + key = key.view(batch_size, kv_seq_len, self.num_heads, self.head_dim) + value = value.view(batch_size, kv_seq_len, self.num_heads, self.head_dim) + + query = query.transpose(1, 2) + key = key.transpose(1, 2) + value = value.transpose(1, 2) + + # >>> START AH Changes <<< + query, key, value = match_attn_matrices_for_parallel(query, key, value) + (attention_mask,) = adjust_tensors_for_parallel(query, attention_mask) + # >>> END AH Changes <<< + + attn_output = F.scaled_dot_product_attention(query, key, value, attn_mask=attention_mask) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(batch_size, q_seq_len, -1) + + output = self.o_proj(attn_output) + + return output, None + + +class MllamaTextCrossAttentionWithAdapters(MllamaTextCrossAttentionAdaptersMixin, MllamaTextCrossAttention): + def forward( + self, + hidden_states: torch.Tensor, + cross_attention_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Cache] = None, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + use_cache: bool = None, + cache_position: Optional[torch.LongTensor] = None, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + bsz, q_len, _ = hidden_states.size() + query_states = self.q_proj(hidden_states) + query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) + query_states = self.q_norm(query_states) + + if cross_attention_states is not None: + key_states = self.k_proj(cross_attention_states) + value_states = self.v_proj(cross_attention_states) + key_states = key_states.view(bsz, -1, self.num_key_value_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, -1, self.num_key_value_heads, self.head_dim).transpose(1, 2) + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + key_states = self.k_norm(key_states) + if past_key_value is not None: + # if we have a new image + new tokens, we only computed key_states on that new image + # we still update the cross key states, past_image, new_image. And use it! + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, {"cache_position": cache_position} + ) + elif cache_position[0] != 0: + key_states, value_states = ( + past_key_value.key_cache[self.layer_idx], + past_key_value.value_cache[self.layer_idx], + ) + else: + raise ValueError( + "Cross attention layer can't find neither `cross_attn_states` nor cached values for key/values!" + ) + + # >>> START AH Changes <<< + query_states, key_states, value_states = match_attn_matrices_for_parallel( + query_states, key_states, value_states + ) + (attention_mask,) = adjust_tensors_for_parallel(query_states, attention_mask) + # >>> END AH Changes <<< + + attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) + + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_weights = attn_weights + causal_mask + + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(bsz, q_len, -1) + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + +class MllamaTextCrossSdpaAttentionWithAdapters(MllamaTextCrossAttentionAdaptersMixin, MllamaTextCrossSdpaAttention): + + def forward( + self, + hidden_states: torch.Tensor, + cross_attention_states: Optional[torch.Tensor] = None, + past_key_value: Optional[Cache] = None, + attention_mask: Optional[torch.Tensor] = None, + output_attentions: bool = False, + use_cache: bool = None, + cache_position: Optional[torch.LongTensor] = None, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel""" + if output_attentions: + # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. + logger.warning_once( + "MllamaModel is using MllamaTextCrossSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to the manual attention implementation, " + 'but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.' + ) + return super().forward( + hidden_states=hidden_states, + cross_attention_states=cross_attention_states, + attention_mask=attention_mask, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cache_position=cache_position, + ) + + bsz, q_len, _ = hidden_states.size() + query_states = self.q_proj(hidden_states) + query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) + query_states = self.q_norm(query_states) + + if cross_attention_states is not None: + key_states = self.k_proj(cross_attention_states) + value_states = self.v_proj(cross_attention_states) + key_states = key_states.view(bsz, -1, self.num_key_value_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, -1, self.num_key_value_heads, self.head_dim).transpose(1, 2) + + if past_key_value is not None: + # if we have a new image + new tokens, we only computed key_states on that new image + # we still update the cross key states, past_image, new_image. And use it! + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, {"cache_position": cache_position} + ) + elif cache_position[0] != 0: + key_states, value_states = ( + past_key_value.key_cache[self.layer_idx], + past_key_value.value_cache[self.layer_idx], + ) + else: + raise ValueError( + "Cross attention layer can't find neither `cross_attn_states` nor cached values for key/values!" + ) + + # >>> START AH Changes <<< + query_states, key_states, value_states = match_attn_matrices_for_parallel( + query_states, key_states, value_states + ) + (attention_mask,) = adjust_tensors_for_parallel(query_states, attention_mask) + # >>> END AH Changes <<< + + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + key_states = self.k_norm(key_states) + + # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, + # Reference: https://github.com/pytorch/pytorch/issues/112577. + if query_states.device.type == "cuda" and attention_mask is not None: + query_states = query_states.contiguous() + key_states = key_states.contiguous() + value_states = value_states.contiguous() + + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + is_causal = True if attention_mask is None and q_len > 1 else False + + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=attention_mask, + dropout_p=self.dropout if self.training else 0.0, + is_causal=is_causal, + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(bsz, q_len, -1) + attn_output = self.o_proj(attn_output) + + return attn_output, None, past_key_value + + +class MllamaTextSelfAttentionWithAdapters(MllamaTextSelfAttentionAdaptersMixin, MllamaTextSelfAttention): + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + position_embeddings: torch.Tensor, + output_attentions: bool = False, + use_cache: bool = False, + past_key_value=None, + cache_position=None, + **kwargs, + ): + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(sbsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + + # >>> START AH Changes <<< + query_states, key_states, value_states = match_attn_matrices_for_parallel( + query_states, key_states, value_states + ) + (attention_mask,) = adjust_tensors_for_parallel(query_states, attention_mask) + # >>> END AH Changes <<< + + cos, sin = position_embeddings + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) + + if past_key_value is not None: + # sin and cos are specific to RoPE models; cache_position needed for the static cache + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} + key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + # >>> START AH Changes <<< + key_states, value_states, attention_mask = self.prefix_tuning( + key_states, value_states, hidden_states, attention_mask + ) + (query_states,) = adjust_tensors_for_parallel(key_states, query_states) + bsz = key_states.shape[0] + # >>> END AH Changes <<< + + attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim) + + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_weights = attn_weights + causal_mask + + # upcast attention to fp32 + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.view(bsz, q_len, -1) + + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + +class MllamaTextSelfSdpaAttentionWithAdapters(MllamaTextSelfAttentionAdaptersMixin, MllamaTextSelfSdpaAttention): + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: torch.Tensor, + position_embeddings: torch.Tensor, + output_attentions: bool = False, + use_cache: bool = False, + past_key_value=None, + cache_position=None, + **kwargs, + ): + if output_attentions: + # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. + logger.warning_once( + "MllamaModel is using MllamaTextSelfSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to the manual attention implementation, " + 'but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.' + ) + return super().forward( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_embeddings=position_embeddings, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cache_position=cache_position, + **kwargs, + ) + + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + + # >>> START AH Changes <<< + query_states, key_states, value_states = match_attn_matrices_for_parallel( + query_states, key_states, value_states + ) + (attention_mask,) = adjust_tensors_for_parallel(query_states, attention_mask) + # >>> END AH Changes <<< + + cos, sin = position_embeddings + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) + + if past_key_value is not None: + # sin and cos are specific to RoPE models; cache_position needed for the static cache + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} + key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + # >>> START AH Changes <<< + key_states, value_states, attention_mask = self.prefix_tuning( + key_states, value_states, hidden_states, attention_mask + ) + (query_states,) = adjust_tensors_for_parallel(key_states, query_states) + bsz = key_states.shape[0] + # >>> END AH Changes <<< + + causal_mask = attention_mask + if attention_mask is not None: + causal_mask = causal_mask[:, :, :, : key_states.shape[-2]] + + # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, + # Reference: https://github.com/pytorch/pytorch/issues/112577. + if query_states.device.type == "cuda" and causal_mask is not None: + query_states = query_states.contiguous() + key_states = key_states.contiguous() + value_states = value_states.contiguous() + + # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment + # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling. + is_causal = True if causal_mask is None and q_len > 1 else False + + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=causal_mask, + dropout_p=self.dropout if self.training else 0.0, + is_causal=is_causal, + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.view(bsz, q_len, -1) + + attn_output = self.o_proj(attn_output) + return attn_output, None, past_key_value From a52154fd6eb0e54789b536fa4f522c96fb041f52 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 16 Jan 2025 11:09:37 +0100 Subject: [PATCH 48/63] Progress: - Create Mllama testbase blueprint - fix multiple prefixtuningpools problem by updating Mllama mixins - add blueprint for static model conversion from MllamaForConditionalGeneration - Remove wrong labels argument in MllamaModel --- src/adapters/head_utils.py | 10 ++ src/adapters/methods/prefix_tuning.py | 1 + src/adapters/models/__init__.py | 2 + src/adapters/models/mllama/adapter_model.py | 2 - src/adapters/models/mllama/mixin_mllama.py | 37 ++++---- src/adapters/models/mllama/modeling_mllama.py | 10 +- tests/test_methods/test_on_mllama.py | 95 +++++++++++++++++++ 7 files changed, 132 insertions(+), 25 deletions(-) create mode 100644 tests/test_methods/test_on_mllama.py diff --git a/src/adapters/head_utils.py b/src/adapters/head_utils.py index 6f419744b5..ede1b674a7 100644 --- a/src/adapters/head_utils.py +++ b/src/adapters/head_utils.py @@ -788,6 +788,16 @@ }, "layers": ["proj_out"], }, + "MllamaForConditionalGeneration": { + "config": { + "head_type": "causal_lm", + "layers": 1, + "activation_function": None, + "layer_norm": False, + "bias": False, + }, + "layers": ["language_model.lm_head"], + }, } diff --git a/src/adapters/methods/prefix_tuning.py b/src/adapters/methods/prefix_tuning.py index 17ab177a45..8fae3770aa 100644 --- a/src/adapters/methods/prefix_tuning.py +++ b/src/adapters/methods/prefix_tuning.py @@ -153,6 +153,7 @@ def __init__(self, model_config: PretrainedConfig, adapters_config: ModelAdapter self.prefix_tunings = nn.ModuleDict() def indicate_prefix(self, prefix_name: str, location_key: str, **kwargs): + """ Indicate that a Prefix Tuning module should be added to the indicated layer. """ if prefix_name not in self.prefix_counts: self.prefix_counts[prefix_name] = {location_key: {"count": 1, **kwargs}} elif location_key not in self.prefix_counts[prefix_name]: diff --git a/src/adapters/models/__init__.py b/src/adapters/models/__init__.py index 2b7c55fc34..ca8f92b60d 100644 --- a/src/adapters/models/__init__.py +++ b/src/adapters/models/__init__.py @@ -23,6 +23,7 @@ from .mllama.mixin_mllama import ( MllamaAdaptersMixin, MllamaCrossAttentionDecoderLayerAdaptersMixin, + MllamaForConditionalGenerationWithHeadsAdaptersMixin, MllamaSelfAttentionDecoderLayerAdaptersMixin, MllamaTextCrossAttentionAdaptersMixin, MllamaTextModelAdaptersMixin, @@ -122,6 +123,7 @@ "LlamaForQuestionAnswering": LlamaForQuestionAnsweringAdapterMixin, "MistralModel": MistralModelAdapterMixin, # Mulitmodal Llama + "MllamaForConditionalGeneration": MllamaForConditionalGenerationWithHeadsAdaptersMixin, "MllamaModel": MllamaAdaptersMixin, "MllamaVisionModel": MllamaVisionModelAdaptersMixin, "MllamaTextModel": MllamaTextModelAdaptersMixin, diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index b3e7b4fe52..0b7565a4f9 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -81,7 +81,6 @@ def forward( position_ids: Optional[torch.LongTensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, - labels: Optional[torch.LongTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, @@ -146,7 +145,6 @@ def forward( past_key_values=past_key_values, use_cache=use_cache, inputs_embeds=inputs_embeds, - labels=labels, output_hidden_states=output_hidden_states, output_attentions=output_attentions, return_dict=return_dict, diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 27b4aa7cbb..8baf8c3b3d 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -2,6 +2,8 @@ import torch.nn as nn +from transformers.models.mllama.modeling_mllama import MllamaForConditionalGeneration + from ...methods.reft import ReftLayer, hook_fn from ...model_mixin import ( EmbeddingAdaptersMixin, @@ -9,20 +11,21 @@ InvertibleAdaptersMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin, + ModelWithHeadsAdaptersMixin, ) -from ..clip.mixin_clip import CLIPEncoderAdaptersMixin, CLIPEncoderLayerAdaptersMixin -from ..llama.mixin_llama import LlamaAttentionMixin, LlamaDecoderLayerMixin +from ..clip.mixin_clip import CLIPAttentionAdaptersMixin, CLIPEncoderAdaptersMixin, CLIPEncoderLayerAdaptersMixin +from ..llama.mixin_llama import LlamaDecoderLayerMixin -class MllamaVisionAttentionAdaptersMixin(LlamaAttentionMixin): +class MllamaVisionAttentionAdaptersMixin(CLIPAttentionAdaptersMixin): """Mixin for adding adapter support to MLLaMA's vision attention module.""" -class MllamaTextCrossAttentionAdaptersMixin(LlamaAttentionMixin): +class MllamaTextCrossAttentionAdaptersMixin(CLIPAttentionAdaptersMixin): """Mixin for adding adapter support to MLLaMA's cross-attention module.""" -class MllamaTextSelfAttentionAdaptersMixin(LlamaAttentionMixin): +class MllamaTextSelfAttentionAdaptersMixin(CLIPAttentionAdaptersMixin): """Mixin for adding adapter support to MLLaMA's self-attention module.""" @@ -42,7 +45,7 @@ class MllamaVisionEncoderAdaptersMixin(CLIPEncoderAdaptersMixin): """Mixin for adding adapter support to MLLaMA's vision encoder module.""" -class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): +class MllamaVisionModelAdaptersMixin: """Adds adapters to the a MllamaVisionModel class.""" support_prompt_tuning = False @@ -57,22 +60,11 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: yield i, layer -class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin): +class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin): """Adds adapters to the a MllamaTextModel class.""" support_prompt_tuning = False - def init_adapters(self, model_config, adapters_config): - super().init_adapters(model_config, adapters_config) - - # Register hook for post embedding forward - self.embed_tokens.register_forward_hook(self.post_embedding_forward) - - def post_embedding_forward(self, module, args, embedding_output): - embedding_output = self.invertible_adapters_forward(embedding_output) - # Prompt tuning not yet supported - return embedding_output - def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: for i, layer in enumerate(self.layers): yield i, layer @@ -100,6 +92,9 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: def _init_adapters_submodules(self, model_config, adapters_config): """Initialize adapters in vision and language models separately.""" + # transformers naming inconsistency: Add num_attention_heads to the model config for the vision model because it is by default represented by the parameter attention_head + model_config.vision_config.num_attention_heads = model_config.vision_config.attention_heads + # Initialize vision model adapters for module in self.vision_model.modules(): if hasattr(module, "init_adapters"): @@ -126,3 +121,9 @@ def _init_reft_layers(self, model_config, adapters_config): if not hasattr(layer, "reft_layer"): layer.reft_layer = ReftLayer("output", model_config.text_config, adapters_config) layer.register_forward_hook(hook_fn) + + +class MllamaForConditionalGenerationWithHeadsAdaptersMixin(ModelWithHeadsAdaptersMixin, MllamaAdaptersMixin): + """Adds adapters to the MllamaForConditionalGeneration class.""" + + pass diff --git a/src/adapters/models/mllama/modeling_mllama.py b/src/adapters/models/mllama/modeling_mllama.py index 036e39895f..6b64d3c564 100644 --- a/src/adapters/models/mllama/modeling_mllama.py +++ b/src/adapters/models/mllama/modeling_mllama.py @@ -30,21 +30,21 @@ from adapters.composition import adjust_tensors_for_parallel, match_attn_matrices_for_parallel from transformers.cache_utils import Cache from transformers.models.mllama.modeling_mllama import ( - repeat_kv, - apply_rotary_pos_emb, - MllamaVisionAttention, - MllamaVisionSdpaAttention, MllamaTextCrossAttention, MllamaTextCrossSdpaAttention, MllamaTextSelfAttention, MllamaTextSelfSdpaAttention, + MllamaVisionAttention, + MllamaVisionSdpaAttention, + apply_rotary_pos_emb, + repeat_kv, ) from transformers.utils import logging from .mixin_mllama import ( - MllamaVisionAttentionAdaptersMixin, MllamaTextCrossAttentionAdaptersMixin, MllamaTextSelfAttentionAdaptersMixin, + MllamaVisionAttentionAdaptersMixin, ) diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py new file mode 100644 index 0000000000..585263c71c --- /dev/null +++ b/tests/test_methods/test_on_mllama.py @@ -0,0 +1,95 @@ +import random + +from transformers.models.mllama.configuration_mllama import MllamaConfig, MllamaTextConfig, MllamaVisionConfig + +from .generator import * + + +def from_text_vision_configs(config_class, text_config: MllamaTextConfig, vision_config: MllamaVisionConfig, **kwargs): + """ + Create a MllamaConfig instance from separate text and vision configs. + + This standalone function mimics the behavior of class methods like CLIPConfig.from_text_vision_configs, + but works without modifying the MllamaConfig class. + + Args: + config_class: The configuration class to instantiate (MllamaConfig) + text_config: The configuration for the text model + vision_config: The configuration for the vision model + **kwargs: Additional arguments to pass to the config constructor + + Returns: + An instance of config_class initialized with the text and vision configs + """ + return config_class(text_config=text_config.to_dict(), vision_config=vision_config.to_dict(), **kwargs) + + +class MllamaAdapterTestBase(TextAdapterTestBase): + + config = staticmethod( + lambda: from_text_vision_configs( + MllamaConfig, + MllamaTextConfig( + vocab_size=1000, # Minimal vocab size + hidden_size=128, + num_hidden_layers=1, + num_attention_heads=2, + num_key_value_heads=2, + intermediate_size=256, + cross_attention_layers=[0], + bos_token_id=990, + eos_token_id=991, + pad_token_id=992, + max_position_embeddings=512, + rope_scaling={ + "rope_type": "default", + }, + ), + MllamaVisionConfig( + hidden_size=128, + num_hidden_layers=1, + num_global_layers=1, + num_attention_heads=1, + intermediate_size=256, + vision_output_dim=128, + image_size=112, + patch_size=4, + ), + ) + ) + tokenizer_name = "arnavgrg/mllama-11b-vision-lora" + + def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **kwargs): + # text inputs + shape = self.default_text_input_samples_shape + total_dims = 1 + for dim in shape: + total_dims *= dim + values = [] + for _ in range(total_dims): + values.append(random.randint(0, vocab_size - 1)) + input_ids = torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() + # this is needed e.g. for BART + if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: + input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) + input_ids[:, -1] = config.eos_token_id + in_data = {"input_ids": input_ids} + + # vision inputs + shape = self.default_vision_input_samples_shape + total_dims = 1 + for dim in shape: + total_dims *= dim + values = [] + for _ in range(total_dims): + values.append(random.random()) + pixel_values = torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() + in_data["pixel_values"] = pixel_values + + return in_data + + +test_methods = generate_method_tests(MllamaAdapterTestBase, excluded_tests=[]) + +for test_class_name, test_class in test_methods.items(): + globals()[test_class_name] = test_class From 2ec0b35c20e8ba7a1bc04ff3320c3e429b90a1be Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 16 Jan 2025 11:14:29 +0100 Subject: [PATCH 49/63] save links for useful resources --- tests/test_methods/test_on_mllama.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index 585263c71c..1375bf6060 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -93,3 +93,10 @@ def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **k for test_class_name, test_class in test_methods.items(): globals()[test_class_name] = test_class + + +""" resources: +https://github.com/AdrianBZG/llama-multimodal-vqa +https://huggingface.co/blog/llama32 +https://github.com/huggingface/huggingface-llama-recipes/blob/main/fine_tune/Llama-Vision%20FT.ipynb +""" From 88f6230c38318b5162fe922d1f4fc9e6c22f3a5a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 16 Jan 2025 12:14:38 +0100 Subject: [PATCH 50/63] Integrate CLIP into refactored test structure --- .../test_model.py} | 90 +++---------------- .../test_on_clip/test_textmodel.py | 25 ++++++ .../test_textwithprojectionmodel.py | 25 ++++++ .../test_on_clip/test_visionmodel.py | 26 ++++++ .../test_visionwithprojectionmodel.py | 26 ++++++ tests/test_methods/test_on_clip_vision.py | 65 -------------- 6 files changed, 114 insertions(+), 143 deletions(-) rename tests/test_methods/{test_on_clip_text.py => test_on_clip/test_model.py} (56%) create mode 100644 tests/test_methods/test_on_clip/test_textmodel.py create mode 100644 tests/test_methods/test_on_clip/test_textwithprojectionmodel.py create mode 100644 tests/test_methods/test_on_clip/test_visionmodel.py create mode 100644 tests/test_methods/test_on_clip/test_visionwithprojectionmodel.py delete mode 100644 tests/test_methods/test_on_clip_vision.py diff --git a/tests/test_methods/test_on_clip_text.py b/tests/test_methods/test_on_clip/test_model.py similarity index 56% rename from tests/test_methods/test_on_clip_text.py rename to tests/test_methods/test_on_clip/test_model.py index e4bab9c1f4..07e338d1bd 100644 --- a/tests/test_methods/test_on_clip_text.py +++ b/tests/test_methods/test_on_clip/test_model.py @@ -1,68 +1,7 @@ import random -from transformers import CLIPConfig, CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPVisionConfig - -from .generator import * - - -class CLIPTextAdapterTestBase(TextAdapterTestBase): - model_class = CLIPTextModel - config_class = CLIPTextConfig - config = make_config( - CLIPTextConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPTextAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPTextAdapterTestBase, - unittest.TestCase, -): - pass - - -class CLIPTextWithProjectionAdapterTestBase(TextAdapterTestBase): - model_class = CLIPTextModelWithProjection - config_class = CLIPTextConfig - config = make_config( - CLIPTextConfig, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - tokenizer_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPTextWithProjectionAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPTextWithProjectionAdapterTestBase, - unittest.TestCase, -): - pass +from tests.test_methods.generator import * +from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig class CLIPAdapterTestBase(TextAdapterTestBase): @@ -122,24 +61,19 @@ def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **k def add_head(self, *args, **kwargs): pass - -@require_torch -class CLIPAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPAdapterTestBase, - unittest.TestCase, -): def test_adapter_fusion_save_with_head(self): # This test is not applicable to CLIP self.skipTest("Not applicable to CLIP.") def test_load_adapter_setup(self): self.skipTest("Not applicable to CLIP.") + + +method_tests = generate_method_tests( + model_test_base=CLIPAdapterTestBase, + excluded_tests=["Embeddings", "Heads", "Composition", "ClassConversion", "PromptTuning", "ConfigUnion"], +) + + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_on_clip/test_textmodel.py b/tests/test_methods/test_on_clip/test_textmodel.py new file mode 100644 index 0000000000..e17b309c22 --- /dev/null +++ b/tests/test_methods/test_on_clip/test_textmodel.py @@ -0,0 +1,25 @@ +from tests.test_methods.generator import * +from transformers import CLIPTextConfig, CLIPTextModel + + +class CLIPTextAdapterTestBase(TextAdapterTestBase): + model_class = CLIPTextModel + config_class = CLIPTextConfig + config = make_config( + CLIPTextConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "openai/clip-vit-base-patch32" + + +method_tests = generate_method_tests( + model_test_base=CLIPTextAdapterTestBase, + excluded_tests=["Embeddings", "Heads", "Composition", "ClassConversion", "PromptTuning", "ConfigUnion"], +) + + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_on_clip/test_textwithprojectionmodel.py b/tests/test_methods/test_on_clip/test_textwithprojectionmodel.py new file mode 100644 index 0000000000..e4ddc7ab0b --- /dev/null +++ b/tests/test_methods/test_on_clip/test_textwithprojectionmodel.py @@ -0,0 +1,25 @@ +from tests.test_methods.generator import * +from transformers import CLIPTextConfig, CLIPTextModelWithProjection + + +class CLIPTextWithProjectionAdapterTestBase(TextAdapterTestBase): + model_class = CLIPTextModelWithProjection + config_class = CLIPTextConfig + config = make_config( + CLIPTextConfig, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + tokenizer_name = "openai/clip-vit-base-patch32" + + +method_tests = generate_method_tests( + model_test_base=CLIPTextWithProjectionAdapterTestBase, + excluded_tests=["Embeddings", "Heads", "Composition", "ClassConversion", "PromptTuning", "ConfigUnion"], +) + + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_on_clip/test_visionmodel.py b/tests/test_methods/test_on_clip/test_visionmodel.py new file mode 100644 index 0000000000..932354b032 --- /dev/null +++ b/tests/test_methods/test_on_clip/test_visionmodel.py @@ -0,0 +1,26 @@ +from tests.test_methods.generator import * +from transformers import CLIPVisionConfig, CLIPVisionModel + + +class CLIPVisionAdapterTestBase(VisionAdapterTestBase): + model_class = CLIPVisionModel + config_class = CLIPVisionConfig + config = make_config( + CLIPVisionConfig, + image_size=224, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + feature_extractor_name = "openai/clip-vit-base-patch32" + + +method_tests = generate_method_tests( + model_test_base=CLIPVisionAdapterTestBase, + excluded_tests=["Embeddings", "Heads", "Composition", "ClassConversion", "PromptTuning", "ConfigUnion"], +) + + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_on_clip/test_visionwithprojectionmodel.py b/tests/test_methods/test_on_clip/test_visionwithprojectionmodel.py new file mode 100644 index 0000000000..ba73a2509f --- /dev/null +++ b/tests/test_methods/test_on_clip/test_visionwithprojectionmodel.py @@ -0,0 +1,26 @@ +from tests.test_methods.generator import * +from transformers import CLIPVisionConfig, CLIPVisionModelWithProjection + + +class CLIPVisionWithProjectionAdapterTestBase(VisionAdapterTestBase): + model_class = CLIPVisionModelWithProjection + config_class = CLIPVisionConfig + config = make_config( + CLIPVisionConfig, + image_size=224, + hidden_size=32, + num_hidden_layers=4, + num_attention_heads=4, + intermediate_size=37, + ) + feature_extractor_name = "openai/clip-vit-base-patch32" + + +method_tests = generate_method_tests( + model_test_base=CLIPVisionWithProjectionAdapterTestBase, + excluded_tests=["Embeddings", "Heads", "Composition", "ClassConversion", "PromptTuning", "ConfigUnion"], +) + + +for test_class_name, test_class in method_tests.items(): + globals()[test_class_name] = test_class diff --git a/tests/test_methods/test_on_clip_vision.py b/tests/test_methods/test_on_clip_vision.py deleted file mode 100644 index 82cf9590c8..0000000000 --- a/tests/test_methods/test_on_clip_vision.py +++ /dev/null @@ -1,65 +0,0 @@ -from transformers import CLIPVisionConfig, CLIPVisionModel, CLIPVisionModelWithProjection - -from .generator import * - - -class CLIPVisionAdapterTestBase(VisionAdapterTestBase): - model_class = CLIPVisionModel - config_class = CLIPVisionConfig - config = make_config( - CLIPVisionConfig, - image_size=224, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - feature_extractor_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPVisionAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPVisionAdapterTestBase, - unittest.TestCase, -): - pass - - -class CLIPVisionWithProjectionAdapterTestBase(VisionAdapterTestBase): - model_class = CLIPVisionModelWithProjection - config_class = CLIPVisionConfig - config = make_config( - CLIPVisionConfig, - image_size=224, - hidden_size=32, - num_hidden_layers=4, - num_attention_heads=4, - intermediate_size=37, - ) - feature_extractor_name = "openai/clip-vit-base-patch32" - - -@require_torch -class CLIPVisionWithProjectionAdapterTest( - BottleneckAdapterTestMixin, - CompacterTestMixin, - IA3TestMixin, - LoRATestMixin, - PrefixTuningTestMixin, - ReftTestMixin, - UniPELTTestMixin, - AdapterFusionModelTestMixin, - CompabilityTestMixin, - CLIPVisionWithProjectionAdapterTestBase, - unittest.TestCase, -): - pass From a75846af928891bb4abdbe8c988597aa5ac05249 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 17 Jan 2025 13:22:27 +0100 Subject: [PATCH 51/63] Progress: - make style - make quality - Update Mllama test config and get_input_samples() --- src/adapters/methods/prefix_tuning.py | 2 +- src/adapters/models/mllama/adapter_model.py | 2 - src/adapters/models/mllama/mixin_mllama.py | 2 - src/adapters/models/mllama/modeling_mllama.py | 3 +- tests/test_methods/test_on_mllama.py | 64 +++++++++---------- 5 files changed, 34 insertions(+), 39 deletions(-) diff --git a/src/adapters/methods/prefix_tuning.py b/src/adapters/methods/prefix_tuning.py index 8fae3770aa..ce584ec25d 100644 --- a/src/adapters/methods/prefix_tuning.py +++ b/src/adapters/methods/prefix_tuning.py @@ -153,7 +153,7 @@ def __init__(self, model_config: PretrainedConfig, adapters_config: ModelAdapter self.prefix_tunings = nn.ModuleDict() def indicate_prefix(self, prefix_name: str, location_key: str, **kwargs): - """ Indicate that a Prefix Tuning module should be added to the indicated layer. """ + """Indicate that a Prefix Tuning module should be added to the indicated layer.""" if prefix_name not in self.prefix_counts: self.prefix_counts[prefix_name] = {location_key: {"count": 1, **kwargs}} elif location_key not in self.prefix_counts[prefix_name]: diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 0b7565a4f9..e1aff9d97b 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -4,8 +4,6 @@ import torch from torch import nn -from adapters.heads.language_modeling import CausalLMOutputWithPast -from transformers.cache_utils import Cache from transformers.modeling_outputs import BaseModelOutputWithPast from transformers.models.mllama.modeling_mllama import ( MLLAMA_START_DOCSTRING, diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 8baf8c3b3d..0b2aac6b8a 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -2,8 +2,6 @@ import torch.nn as nn -from transformers.models.mllama.modeling_mllama import MllamaForConditionalGeneration - from ...methods.reft import ReftLayer, hook_fn from ...model_mixin import ( EmbeddingAdaptersMixin, diff --git a/src/adapters/models/mllama/modeling_mllama.py b/src/adapters/models/mllama/modeling_mllama.py index 6b64d3c564..196fdcbf51 100644 --- a/src/adapters/models/mllama/modeling_mllama.py +++ b/src/adapters/models/mllama/modeling_mllama.py @@ -19,7 +19,6 @@ # limitations under the License. import math -import warnings from typing import Optional, Tuple import torch @@ -328,7 +327,7 @@ def forward( value_states = self.v_proj(hidden_states) query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2) - key_states = key_states.view(sbsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) + key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) # >>> START AH Changes <<< diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index 1375bf6060..45569f5843 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -1,5 +1,9 @@ -import random +import os +from pathlib import Path +from PIL import Image + +from transformers import MllamaImageProcessor from transformers.models.mllama.configuration_mllama import MllamaConfig, MllamaTextConfig, MllamaVisionConfig from .generator import * @@ -32,7 +36,7 @@ class MllamaAdapterTestBase(TextAdapterTestBase): MllamaTextConfig( vocab_size=1000, # Minimal vocab size hidden_size=128, - num_hidden_layers=1, + num_hidden_layers=4, num_attention_heads=2, num_key_value_heads=2, intermediate_size=256, @@ -47,44 +51,40 @@ class MllamaAdapterTestBase(TextAdapterTestBase): ), MllamaVisionConfig( hidden_size=128, - num_hidden_layers=1, - num_global_layers=1, + num_hidden_layers=4, + num_global_layers=4, num_attention_heads=1, intermediate_size=256, vision_output_dim=128, - image_size=112, - patch_size=4, + image_size=224, ), ) ) tokenizer_name = "arnavgrg/mllama-11b-vision-lora" + shape = (1, 128) + + # Save runtime by computing the processed image once and reusing it for all tests + FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" + + img_processor = MllamaImageProcessor() + img = Image.open(os.path.join(FIXTURES_DIR, "tests_samples", "COCO", "000000039769.png")) + processed_img = img_processor(img, return_tensors="pt") + + def get_input_samples(self, vocab_size=1000, shape=None, config=None, dtype=torch.float, **kwargs): + shape = shape or self.input_shape + + # Text inputs + input_ids = self.build_rand_ids_tensor(shape, vocab_size) + + in_data = { + "input_ids": input_ids, + "pixel_values": self.processed_img["pixel_values"], + "aspect_ratio_ids": self.processed_img["aspect_ratio_ids"], + "aspect_ratio_mask": self.processed_img["aspect_ratio_mask"], + } - def get_input_samples(self, vocab_size=5000, config=None, dtype=torch.float, **kwargs): - # text inputs - shape = self.default_text_input_samples_shape - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.randint(0, vocab_size - 1)) - input_ids = torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() - # this is needed e.g. for BART - if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: - input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) - input_ids[:, -1] = config.eos_token_id - in_data = {"input_ids": input_ids} - - # vision inputs - shape = self.default_vision_input_samples_shape - total_dims = 1 - for dim in shape: - total_dims *= dim - values = [] - for _ in range(total_dims): - values.append(random.random()) - pixel_values = torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() - in_data["pixel_values"] = pixel_values + if "num_labels" in kwargs: + in_data["labels"] = self.build_rand_ids_tensor(shape[:-1], vocab_size=kwargs["num_labels"]) return in_data From 7b970c94b4aee4a7d91159df88c4b86dd274d445 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 17 Jan 2025 19:26:17 +0100 Subject: [PATCH 52/63] Add mllama model tests --- tests/test_models/test_mllama_model.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/test_models/test_mllama_model.py diff --git a/tests/test_models/test_mllama_model.py b/tests/test_models/test_mllama_model.py new file mode 100644 index 0000000000..096fd4a7ae --- /dev/null +++ b/tests/test_models/test_mllama_model.py @@ -0,0 +1,12 @@ +# flake8: noqa: F403,F405 +from adapters import MllamaAdapterModel +from hf_transformers.tests.models.mllama.test_modeling_mllama import * +from transformers.testing_utils import require_torch + +from .base import AdapterModelTesterMixin + + +@require_torch +class MistralAdapterModelTest(AdapterModelTesterMixin, MllamaForConditionalGenerationIntegrationTest): + all_model_classes = (MllamaAdapterModel,) + fx_compatible = False From 40871e59ec70d0887551798208dc9e20a2741f05 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Sun, 19 Jan 2025 10:37:09 +0100 Subject: [PATCH 53/63] Adapt VisionEncoder forward pre hook --- src/adapters/models/mllama/mixin_mllama.py | 22 +++++++++++++++++++++- tests/test_methods/test_on_mllama.py | 13 +++++++------ 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 0b2aac6b8a..45f4e37779 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -2,6 +2,7 @@ import torch.nn as nn +from ...composition import adjust_tensors_for_parallel_ from ...methods.reft import ReftLayer, hook_fn from ...model_mixin import ( EmbeddingAdaptersMixin, @@ -39,9 +40,28 @@ class MllamaCrossAttentionDecoderLayerAdaptersMixin(LlamaDecoderLayerMixin): """Mixin for adding adapter support to MLLaMA's cross-attention decoder layers.""" -class MllamaVisionEncoderAdaptersMixin(CLIPEncoderAdaptersMixin): +class MllamaVisionEncoderAdaptersMixin: """Mixin for adding adapter support to MLLaMA's vision encoder module.""" + def init_adapters(self, model_config, adapters_config): + # Set hook for parallel composition + for layer in self.layers: + self._set_layer_hook_for_parallel(layer) + + def _set_layer_hook_for_parallel(self, layer: nn.Module): + def hook(module, args, kwargs): + # Extract the hidden states from kwargs + if "hidden_state" in kwargs: + hidden_states = kwargs["hidden_state"] + attention_mask = kwargs.get("attention_mask") + if attention_mask is not None: + adjust_tensors_for_parallel_(hidden_states, attention_mask) + kwargs["hidden_state"] = hidden_states + kwargs["attention_mask"] = attention_mask + return args, kwargs + + layer.register_forward_pre_hook(hook, with_kwargs=True) + class MllamaVisionModelAdaptersMixin: """Adds adapters to the a MllamaVisionModel class.""" diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index 45569f5843..427aa5762d 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -36,10 +36,10 @@ class MllamaAdapterTestBase(TextAdapterTestBase): MllamaTextConfig( vocab_size=1000, # Minimal vocab size hidden_size=128, - num_hidden_layers=4, + num_hidden_layers=1, num_attention_heads=2, num_key_value_heads=2, - intermediate_size=256, + intermediate_size=128, cross_attention_layers=[0], bos_token_id=990, eos_token_id=991, @@ -51,12 +51,13 @@ class MllamaAdapterTestBase(TextAdapterTestBase): ), MllamaVisionConfig( hidden_size=128, - num_hidden_layers=4, - num_global_layers=4, + num_hidden_layers=1, + num_global_layers=1, num_attention_heads=1, - intermediate_size=256, - vision_output_dim=128, + intermediate_size=128, + vision_output_dim=256, image_size=224, + intermediate_layers_indices=[0], ), ) ) From 8a17571ff212147ff0deb0fd904fcf1618e4c628 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 29 Jan 2025 14:34:51 +0100 Subject: [PATCH 54/63] revert merging errors --- src/adapters/models/mllama/__init__.py | 39 ++ tests/test_methods/method_test_impl/base.py | 662 +++++++++++++------- 2 files changed, 476 insertions(+), 225 deletions(-) create mode 100644 src/adapters/models/mllama/__init__.py diff --git a/src/adapters/models/mllama/__init__.py b/src/adapters/models/mllama/__init__.py new file mode 100644 index 0000000000..12ff0ddd99 --- /dev/null +++ b/src/adapters/models/mllama/__init__.py @@ -0,0 +1,39 @@ +# flake8: noqa +# There's no way to ignore "F401 '...' imported but unused" warnings in this +# module, but to preserve other warnings. So, don't check this module at all. + +# Copyright 2020 The Adapter-Hub Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +from transformers.utils import _LazyModule + + +_import_structure = { + "adapter_model": ["MllamaAdapterModel"], +} + + +if TYPE_CHECKING: + from .adapter_model import MllamaAdapterModel + +else: + import sys + + sys.modules[__name__] = _LazyModule( + __name__, + globals()["__file__"], + _import_structure, + ) diff --git a/tests/test_methods/method_test_impl/base.py b/tests/test_methods/method_test_impl/base.py index f5e53fedd6..dcc6eeb835 100644 --- a/tests/test_methods/method_test_impl/base.py +++ b/tests/test_methods/method_test_impl/base.py @@ -1,232 +1,444 @@ -import random +import copy +import os +import tempfile +from typing import Callable -import datasets import torch import adapters -from adapters import AutoAdapterModel -from transformers import AutoFeatureExtractor, AutoTokenizer, GlueDataset, GlueDataTrainingArguments -from transformers.testing_utils import torch_device - - -class AbstractAdapterTestBase: - """Base class for adapter tests. Defines basic functions and attributes with default values which are used in the tests. - Model test classes should inherit from this class or subclass and override the attributes and functions as needed. - """ - - model_class = AutoAdapterModel - tokenizer_name = "tests/fixtures/SiBERT" # path to default tokenizer config available in the test repo - config = None # specified in the actual model test classes - input_shape = () # (batch_size, seq_length) - leave_out_layers = [] - do_run_train_tests = True - num_labels = 2 - - def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): - """Creates a dummy batch of samples in the format required for the model.""" - raise NotImplementedError("get_input_samples() must be implemented in the subclass.") - - def add_head(self, model, name, **kwargs): - """Adds a dummy head to the model.""" - raise NotImplementedError("add_head() must be implemented in the subclass.") - - def get_dataset(self, **kwargs): - """Loads a dummy dataset for the model.""" - raise NotImplementedError("get_dataset() must be implemented in the subclass.") - - def get_dataset_non_batched(self): - """Builds a non-batched dummy dataset for the model.""" - raise NotImplementedError("build_dummy_dataset() must be implemented in the subclass.") - - def attach_labels(self, inputs): - """Attaches labels to the input samples.""" - raise NotImplementedError("attach_labels() with respective label shape must be implemented in the subclass.") - - def get_model(self): - """Builds a model instance for testing based on the provied model configuration.""" - if self.model_class == AutoAdapterModel: - model = AutoAdapterModel.from_config(self.config()) - else: - model = self.model_class(self.config()) - adapters.init(model) +from adapters import ADAPTER_MODEL_MAPPING, AdapterSetup, AdapterTrainer, AutoAdapterModel +from adapters.heads import CausalLMHead +from adapters.utils import WEIGHTS_NAME +from adapters.wrappers import load_model +from transformers import TrainingArguments +from transformers.testing_utils import require_torch, torch_device + +from .utils import add_lm_head, create_twin_models + + +@require_torch +class AdapterMethodBaseTestMixin: + """Implements base test running methods for testing adapter method implementations.""" + + dtypes_to_test = [torch.float32, torch.half] if torch_device == "cuda" else [torch.float32] + + def _assert_adapter_available(self, model, adapter_name): + """Check wether the adapter name is present in the model's adapter config and has been created.""" + self.assertTrue(adapter_name in model.adapters_config) + self.assertGreater(len(model.get_adapter(adapter_name)), 0) + + def _assert_adapter_unavailable(self, model, adapter_name): + """Check wether the adapter name is not present in the model's adapter config and has not been created.""" + self.assertFalse(adapter_name in model.adapters_config) + self.assertEqual(len(model.get_adapter(adapter_name)), 0) + + def _filter_parameters(self, model, filter_keys): + return {k: v for (k, v) in model.named_parameters() if any([filter_key in k for filter_key in filter_keys])} + + def run_add_test(self, model, adapter_config, filter_keys): + model.eval() + + name = "test_adapter_" + adapter_config.__class__.__name__ + model.add_adapter(name, config=adapter_config) + model.set_active_adapters(name) + model.to(torch_device) + + # adapter is correctly added to config + self.assertTrue(name in model.adapters_config) + self.assertEqual(adapter_config, model.adapters_config.get(name)) + + # check that weights are available and active + has_weights = False + filter_keys = [k.format(name=name) for k in filter_keys] + for k, v in self._filter_parameters(model, filter_keys).items(): + has_weights = True + self.assertTrue(v.requires_grad, k) + self.assertTrue(has_weights) + + # Remove added adapters in case of multiple subtests + model.set_active_adapters(None) + model.delete_adapter(name) + + def run_leave_out_test(self, model, adapter_config, leave_out): + model.eval() + + adapter_config = adapter_config.replace(leave_out=leave_out) + name = "test_adapter_" + adapter_config.__class__.__name__ + model.add_adapter(name, config=adapter_config) + model.set_active_adapters(name) + + # adapter is correctly added to config + self._assert_adapter_available(model, name) + + adapter = model.get_adapter(name) + + self.assertNotEqual(len(adapter), 0) + found_layers = list(adapter.keys()) + for layer in leave_out: + self.assertNotIn(layer, found_layers) + + model.delete_adapter(name) + + def run_linear_average_test(self, model, adapter_config, filter_keys): + model.eval() + + weights = [-0.2, 0.9, 0.3] + + # add adapters to average + name = "test_adapter_" + adapter_config.__class__.__name__ + for i in range(len(weights)): + model.add_adapter(name + f"_{i}", config=adapter_config) + + # collect weighted average of adapter weights + averaged_weights = {} + for i, w in enumerate(weights): + this_filter_keys = [k.format(name=name + f"_{i}") for k in filter_keys] + for k, v in self._filter_parameters(model, this_filter_keys).items(): + base_k = k.replace(name + f"_{i}", name) + if base_k not in averaged_weights: + averaged_weights[base_k] = w * v + else: + averaged_weights[base_k] += w * v + + # average adapters + model.average_adapter( + name, [name + f"_{i}" for i in range(len(weights))], weights=weights, combine_strategy="linear" + ) + + # adapter is correctly added to config + self.assertTrue(name in model.adapters_config) + self.assertEqual(adapter_config, model.adapters_config.get(name)) + + # compare averaged weights to collected weights + this_filter_keys = [k.format(name=name) for k in filter_keys] + for k, v in self._filter_parameters(model, this_filter_keys).items(): + self.assertTrue(torch.allclose(v, averaged_weights[k]), k) + + def run_delete_test(self, model, adapter_config, filter_keys): + model.eval() + + name = "test_adapter_" + adapter_config.__class__.__name__ + model.add_adapter(name, config=adapter_config) + model.set_active_adapters(name) model.to(torch_device) - return model - - def build_rand_tensor(self, shape, dtype=torch.float): - """Creates a random tensor of the given shape.""" - total_dims = self._calc_total_dim(shape) - values = [random.random() for _ in range(total_dims)] - - return torch.tensor(data=values, dtype=dtype, device=torch_device).view(shape).contiguous() - - def build_rand_ids_tensor(self, shape, vocab_size=5000): - """Creates a random tensor of type torch.long with the given shape with random values in range 0 - (vocab_size-1).""" - total_dims = self._calc_total_dim(shape) - values = [random.randint(0, vocab_size - 1) for _ in range(total_dims)] - return torch.tensor(data=values, dtype=torch.long, device=torch_device).view(shape).contiguous() - - def _calc_total_dim(self, shape): - total_dims = 1 - for dim in shape: - total_dims *= dim - return total_dims - - def extract_input_ids(self, inputs): - # TODO: Check if this is needed in all tests and if it differs between text, vision and speech models - return inputs["input_ids"] - - def build_generate_input(self, shape): - """The generate() functions for inference require different inputs depeding on the model type. E.g. the text models require input_ids, where as the audio models require input_features""" - return self.build_rand_ids_tensor(self.input_shape if not shape else shape).to(torch_device) - - -class TextAdapterTestBase(AbstractAdapterTestBase): - """Base class for adapter tests for text models. Text models test classes should inherit from this class and override the attributes and functions as needed.""" - - input_shape = (3, 64) - leave_out_layers = [0, 1] - batch_size, seq_length = ( - input_shape # TODO: Check in which tests this is needed and if we can simplify by using input_shape - ) - - def get_input_samples(self, shape=None, vocab_size=5000, config=None, **kwargs): - shape = shape or self.input_shape - input_ids = self.build_rand_ids_tensor(shape, vocab_size=vocab_size) - - # Ensures that only tha last token in each sample is the eos token (needed e.g. for BART) - if config and config.eos_token_id is not None and config.eos_token_id < vocab_size: - input_ids[input_ids == config.eos_token_id] = random.randint(0, config.eos_token_id - 1) - input_ids[:, -1] = config.eos_token_id - in_data = {"input_ids": input_ids} - - # Add decoder input ids for models with a decoder - if config and config.is_encoder_decoder: - in_data["decoder_input_ids"] = input_ids.clone() - - if "num_labels" in kwargs: - in_data["labels"] = self.build_rand_ids_tensor(shape[:-1], vocab_size=kwargs["num_labels"]) - return in_data - - def add_head(self, model, name, **kwargs): - # TODO: Check if this should be more modular - model.add_classification_head(name, **kwargs) - return model.heads[name].config["num_labels"] - - def get_dataset(self, tokenizer=None): - if tokenizer is None: - tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) - if tokenizer.pad_token is None: - tokenizer.pad_token = tokenizer.eos_token - data_args = GlueDataTrainingArguments( - task_name="mrpc", data_dir="./hf_transformers/tests/fixtures/tests_samples/MRPC", overwrite_cache=True + + # adapter is correctly added to config + self._assert_adapter_available(model, name) + + # remove the adapter again + model.delete_adapter(name) + self._assert_adapter_unavailable(model, name) + + # check that weights are available and active + has_weights = False + filter_keys = [k.format(name=name) for k in filter_keys] + for k, v in self._filter_parameters(model, filter_keys).items(): + has_weights = True + self.assertFalse(has_weights) + + def run_get_test(self, model, adapter_config, num_expected_modules): + model.eval() + + model.add_adapter("first", config=adapter_config) + model.set_active_adapters("first") + + # adapter is correctly added to config + name = "first" + self._assert_adapter_available(model, name) + + adapter = model.get_adapter("first") + + self.assertNotEqual(len(adapter), 0) + num_found_modules = sum([len(layer_modules) for layer_modules in adapter.values()]) + self.assertEqual(num_expected_modules, num_found_modules) + + model.delete_adapter("first") + + def run_forward_test(self, model, adapter_config, dtype=torch.float32): + model.eval() + + name = adapter_config.__class__.__name__ + if name not in model.adapters_config: + model.add_adapter(name, config=adapter_config) + model.to(torch_device).to(dtype) + + input_data = self.get_input_samples(config=model.config, dtype=dtype) + + # pass 1: set adapter via property + model.set_active_adapters(name) + output_1 = model(**input_data) + + # pass 2: set via context + # unset and make sure it's unset + model.set_active_adapters(None) + self.assertEqual(None, model.active_adapters) + with AdapterSetup(name): + output_2 = model(**input_data) + + # pass 3: base output + model.set_active_adapters(None) + base_output = model(**input_data) + + self.assertEqual(len(output_1), len(output_2)) + self.assertTrue(torch.equal(output_1[0], output_2[0])) + self.assertGreaterEqual(len(output_1), len(base_output)) + self.assertFalse(torch.equal(output_1[0], base_output[0])) + + # Remove added adapters in case of multiple subtests + model.set_active_adapters(None) + model.delete_adapter(name) + + def run_load_test(self, adapter_config): + model1, model2 = create_twin_models(self.model_class, self.config) + + name = "dummy_adapter" + model1.add_adapter(name, config=adapter_config) + model1.set_active_adapters(name) + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: + model1.save_adapter(temp_dir, name) + + # Check that there are actually weights saved + weights = torch.load(os.path.join(temp_dir, WEIGHTS_NAME), map_location="cpu", weights_only=True) + self.assertTrue(len(weights) > 0) + + # also tests that set_active works + loading_info = {} + model2.load_adapter(temp_dir, set_active=True, loading_info=loading_info) + + # check if all weights were loaded + self.assertEqual(0, len(loading_info["missing_keys"])) + self.assertEqual(0, len(loading_info["unexpected_keys"])) + + # check if adapter was correctly loaded + self.assertTrue(name in model2.adapters_config) + + # check equal output + input_data = self.get_input_samples(config=model1.config) + model1.to(torch_device) + model2.to(torch_device) + output1 = model1(**input_data) + output2 = model2(**input_data) + self.assertEqual(len(output1), len(output2)) + self.assertTrue(torch.allclose(output1[0], output2[0], atol=1e-4)) + + def run_full_model_load_test(self, adapter_config): + model1 = self.get_model() + model1.eval() + + name = "dummy" + model1.add_adapter(name, config=adapter_config) + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir: + model1.save_pretrained(temp_dir) + + model2, loading_info = load_model(temp_dir, self.model_class, output_loading_info=True) + + # check if all weights were loaded + self.assertEqual(0, len(loading_info["missing_keys"])) + self.assertEqual(0, len(loading_info["unexpected_keys"])) + + # check if adapter was correctly loaded + self.assertTrue(name in model2.adapters_config) + + # check equal output + input_data = self.get_input_samples(config=model1.config) + model1.to(torch_device) + model2.to(torch_device) + with AdapterSetup(name): + output1 = model1(**input_data) + output2 = model2(**input_data) + self.assertEqual(len(output1), len(output2)) + self.assertTrue(torch.allclose(output1[0], output2[0], atol=1e-4)) + + def trainings_run(self, model, lr=1.0, steps=8, batch_size=2, gradient_accumulation_steps=1): + # setup dataset + train_dataset = self.get_dataset() + + training_args = TrainingArguments( + output_dir="./examples", + do_train=True, + learning_rate=lr, + max_steps=steps, + use_cpu=True, + per_device_train_batch_size=batch_size, + gradient_accumulation_steps=gradient_accumulation_steps, + remove_unused_columns=False, ) - return GlueDataset(data_args, tokenizer=tokenizer, mode="train") - - def get_dataset_non_batched(self, config): - dataset = [] - for i in range(3): - input_data = self.get_input_samples(config=config) - input_data["labels"] = self.build_rand_ids_tensor((3, 1), self.num_labels) - dataset.append(input_data) - return dataset - - def attach_labels(self, inputs): - inputs["labels"] = torch.randint(0, 2, (self.batch_size, 1), device=torch_device) - return inputs - - -class VisionAdapterTestBase(AbstractAdapterTestBase): - """Base class for adapter tests for vision models. Vision models test classes should inherit from this class and override the attributes and functions as needed.""" - - input_shape = (3, 3, 224, 224) - batch_size = 3 - - def get_input_samples(self, shape=None, config=None, dtype=torch.float, **kwargs): - shape = shape or self.input_shape - pixel_values = self.build_rand_tensor(shape, dtype=dtype) - return {"pixel_values": pixel_values} - - def add_head(self, model, name, **kwargs): - kwargs["num_labels"] = 10 if "num_labels" not in kwargs else kwargs["num_labels"] - model.add_image_classification_head(name, **kwargs) - return model.heads[name].config["num_labels"] - - def get_dataset(self, feature_extractor=None): - dataset = datasets.load_dataset( - "./tests/fixtures/samples/cifar10", - data_dir="./tests/fixtures/samples/cifar10", - split="train", - trust_remote_code=True, + + # evaluate + trainer = AdapterTrainer( + model=model, + args=training_args, + train_dataset=train_dataset, ) - if feature_extractor is None: - feature_extractor = AutoFeatureExtractor.from_pretrained(self.feature_extractor_name) - - def transform(example_batch): - inputs = feature_extractor([x for x in example_batch["img"]], return_tensors="pt") - inputs["labels"] = example_batch["label"] - return inputs - - dataset = dataset.with_transform(transform) - return dataset - - -class AudioAdapterTestBase(AbstractAdapterTestBase): - """Base class for adapter tests for audio models. Audio models test classes should inherit from this class and override the attributes and functions as needed.""" - - input_shape = (3, 80, 3000) # (batch_size, n_mels, enc_seq_len) - time_window = 3000 # Time window for audio samples - seq_length = 80 - batch_size = 3 - - _TASK_DATASET_MAPPING = { - # TODO: build global mapping for all tasks and datasets - "seq2seq_lm": "./tests/fixtures/audio_datasets/common_voice_encoded", - "audio_classification": "./tests/fixtures/audio_datasets/speech_commands_encoded", - } - - def add_head(self, model, name, head_type="seq2seq_lm", **kwargs): - # TODO: simpify Audio tests by using the same head type for all tests - if head_type == "audio_classification": - model.add_audio_classification_head(name, **kwargs) - return model.heads[name].config["num_labels"] - elif head_type == "seq2seq_lm": - kwargs.pop("num_labels", 1) # Remove num_labels from kwargs if present in the tests - model.add_seq2seq_lm_head(name, **kwargs) - return self.input_shape[1] # Return the number of mel features - else: - raise ValueError(f"Head type {head_type} not supported.") - - def get_input_samples(self, shape=None, config=None, **kwargs): - shape = shape or self.input_shape - in_data = {"input_features": self.build_rand_tensor(shape, dtype=torch.float)} - - # Add decoder input ids for models with a decoder - if config and config.is_encoder_decoder: - in_data["decoder_input_ids"] = self.build_rand_ids_tensor((shape[:-1]), vocab_size=config.vocab_size) - return in_data - - def get_dataset(self, task_type: str = "seq2seq_lm", **kwargs): - # Dataset is already processed and saved to disk, to save time during testing - # Preparation script can be found in tests/fixtures/audio_datasets/respective_prepare_script.py - dataset_path = self._TASK_DATASET_MAPPING[task_type] - dataset = datasets.load_from_disk(dataset_path) - return dataset["train"] - - def extract_input_ids(self, inputs): - return inputs["input_features"] - - def build_generate_input(self, shape): - return self.build_rand_tensor(self.input_shape if not shape else shape, dtype=torch.float) - - def attach_labels(self, inputs): - inputs["labels"] = torch.randint(0, 2, (self.batch_size, self.seq_length), device=torch_device) - return inputs - - def get_dataset_non_batched(self, config): - dataset_batched = self.get_dataset() - dataset = [{} for _ in range(len(dataset_batched))] - # For non-batched training, we need to wrap the samples by an additional dimension - for i in range(len(dataset_batched)): - for key, value in dataset_batched[i].items(): - dataset[i][key] = torch.unsqueeze(value, 0) - return dataset + trainer.train() + + def run_train_test(self, adapter_config, filter_keys): + if not self.do_run_train_tests: + self.skipTest("Skipping training tests. Set `do_run_train_tests=True` to run them.") + if self.config_class not in ADAPTER_MODEL_MAPPING: + self.skipTest("Does not support flex heads.") + model = AutoAdapterModel.from_config(self.config()) + + # add two adapters: one will be trained and the other should be frozen + model.add_adapter("mrpc", config=adapter_config) + model.add_adapter("dummy", config=adapter_config) + self.add_head(model, "mrpc") + + self._assert_adapter_available(model, "mrpc") + self._assert_adapter_available(model, "dummy") + + # train the mrpc adapter -> should be activated & unfreezed + model.train_adapter("mrpc") + self.assertEqual(set(["mrpc"]), model.active_adapters.flatten()) + + # all weights of the adapter should be activated + has_weights = False + filter_keys_trained = [k.format(name="mrpc") for k in filter_keys] + for k, v in self._filter_parameters(model, filter_keys_trained).items(): + has_weights = True + self.assertTrue(v.requires_grad, k) + self.assertTrue(has_weights) + # all weights of the adapter not used for training should be frozen + filter_keys_untrained = [k.format(name="dummy") for k in filter_keys] + for k, v in self._filter_parameters(model, filter_keys_untrained).items(): + self.assertFalse(v.requires_grad, k) + + state_dict_pre = copy.deepcopy(model.state_dict()) + + self.trainings_run(model) + + # check that the adapters have changed, but the base model has not + adapters_with_change, base_with_change = False, False + # check whether the key corresponds to a tied embedding + + def has_tied_embeddings(k): + tied_embeddings = hasattr(model.config, "tie_word_embeddings") and model.config.tie_word_embeddings + is_tied_layer = ( + isinstance(model.heads["mrpc"], CausalLMHead) + and "heads.{}.{}.weight".format("mrpc", len(model.heads["mrpc"]._modules) - 1) in k + ) + return tied_embeddings and is_tied_layer + + for (k1, v1), (k2, v2) in zip(state_dict_pre.items(), model.state_dict().items()): + # move both to the same device to avoid device mismatch errors + v1, v2 = v1.to(v2.device), v2 + if "mrpc" in k1 and not has_tied_embeddings(k1): + adapters_with_change |= not torch.equal(v1, v2) + else: + base_with_change |= not torch.equal(v1, v2) + self.assertTrue(adapters_with_change) + self.assertFalse(base_with_change) + + def run_merge_test(self, adapter_config): + model = self.get_model() + model.eval() + model.add_adapter("test_lora", config=adapter_config) + model.to(torch_device) + + input_data = self.get_input_samples(config=model.config) + + # forward in training mode + model.set_active_adapters("test_lora") + output_1 = model(**input_data) + + # forward in merged mode + model.set_active_adapters(None) + model.merge_adapter("test_lora") + model.to(torch_device) + model.eval() + output_2 = model(**input_data) + + # check forward pass + self.assertEqual(len(output_1), len(output_2)) + self.assertTrue(torch.allclose(output_1[0], output_2[0], atol=1e-3)) + + def run_reset_test(self, adapter_config): + model = self.get_model() + model.eval() + model.add_adapter("test_lora", config=adapter_config) + model.to(torch_device) + + input_data = self.get_input_samples(config=model.config) + + # before merging + output_1 = model(**input_data) + + # merge & reset + model.merge_adapter("test_lora") + model.reset_adapter() + + # after merging + output_2 = model(**input_data) + + # check forward pass + self.assertEqual(len(output_1), len(output_2)) + self.assertTrue(torch.allclose(output_1[0], output_2[0], atol=1e-3)) + + def _run_gradient_checkpointing_test_helper(self, adapter_setup_fn: Callable[[adapters.ModelAdaptersMixin], None]): + """ + Test that gradient checkpointing produces the same results as normal training + Args: + adapter_setup_fn: Function that takes a model and sets up the adapter training. Must also add a head (usually via self.add_head(...)). We have this in a separate function to allow complex setups (like training a normal adapter or training parallel setups) + """ + + if not self.do_run_train_tests: + self.skipTest("Skipping training tests. Set `do_run_train_tests=True` to run them.") + if self.config_class not in ADAPTER_MODEL_MAPPING: + self.skipTest("Does not support flex heads.") + + config = self.config() + state_dict_after_training = {} + + # Run training twice (with & without gradient checkpointing) to verify both produce identical results (i.e. the same state dict) + for train_with_checkpointing in [True, False]: + # Set random seed + torch.manual_seed(42) + + # Initialize model + model = adapters.AutoAdapterModel.from_config(config) + + # if model doesn't support gradient checkpointing, skip the test + if not model.supports_gradient_checkpointing: + self.skipTest("Model does not support gradient checkpointing") + + model.to(torch_device) + adapter_setup_fn(model) + + # Enable gradient checkpointing + if train_with_checkpointing: + model.gradient_checkpointing_enable() + + # Train & store state dict + self.trainings_run(model, batch_size=1, gradient_accumulation_steps=2) + state_dict_after_training[train_with_checkpointing] = copy.deepcopy(model.state_dict()) + + # Check that the state dicts are the same (we know that normal training works as expected, so we only need to check that gradient checkpointing produces the same results.) + for (k1, v1), (k2, v2) in zip( + state_dict_after_training[True].items(), state_dict_after_training[False].items() + ): + v1 = v1.to(v2.device) + self.assertTrue(torch.equal(v1, v2), msg=f"Key {k1} is not equal:\nv1: {v1}\nv2: {v2}") + + def run_gradient_checkpointing_single_adapter_test(self, adapter_config): + def adapter_setup_fn(model): + model.add_adapter("adapter1", config=adapter_config) + self.add_head(model, "adapter1") + model.train_adapter("adapter1") + model.adapter_to("adapter1", torch_device) + + self._run_gradient_checkpointing_test_helper(adapter_setup_fn) + + def run_generate_test(self, adapter_config, max_new_tokens=32): + if self.config_class not in ADAPTER_MODEL_MAPPING or ( + "seq2seq_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types + and "causal_lm" not in ADAPTER_MODEL_MAPPING[self.config_class].head_types + ): + self.skipTest("No seq2seq or causal language model head") + model = self.get_model() + model.add_adapter("generate", config=adapter_config) + add_lm_head(self.config_class, model, "generate") + model.set_active_adapters("generate") + model.to(torch_device) + generate_input = self.build_generate_input(self.input_shape).to(torch_device) + generated = model.generate(generate_input, max_new_tokens=max_new_tokens) + self.assertLessEqual(generated.shape, (self.input_shape[0], self.input_shape[1] + max_new_tokens)) \ No newline at end of file From 5abb4ab1bcfc428386cb8c26bf5f8788b865340a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 6 Feb 2025 16:56:08 +0100 Subject: [PATCH 55/63] Fix test model config and base model --- src/adapters/models/mllama/adapter_model.py | 1 - tests/test_methods/test_on_mllama.py | 19 ++++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index e1aff9d97b..880b83da32 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -204,7 +204,6 @@ def forward( position_ids=position_ids, past_key_values=past_key_values, inputs_embeds=inputs_embeds, - labels=labels, use_cache=use_cache, output_attentions=output_attentions, output_hidden_states=output_hidden_states, diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index 427aa5762d..de33b65eb1 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -6,7 +6,9 @@ from transformers import MllamaImageProcessor from transformers.models.mllama.configuration_mllama import MllamaConfig, MllamaTextConfig, MllamaVisionConfig -from .generator import * +from .base import TextAdapterTestBase +from .generator import generate_method_tests +import torch def from_text_vision_configs(config_class, text_config: MllamaTextConfig, vision_config: MllamaVisionConfig, **kwargs): @@ -36,28 +38,27 @@ class MllamaAdapterTestBase(TextAdapterTestBase): MllamaTextConfig( vocab_size=1000, # Minimal vocab size hidden_size=128, - num_hidden_layers=1, + num_hidden_layers=4, num_attention_heads=2, num_key_value_heads=2, intermediate_size=128, - cross_attention_layers=[0], + cross_attention_layers=[3], bos_token_id=990, eos_token_id=991, pad_token_id=992, - max_position_embeddings=512, + max_position_embeddings=128, rope_scaling={ "rope_type": "default", }, ), MllamaVisionConfig( hidden_size=128, - num_hidden_layers=1, + num_hidden_layers=4, num_global_layers=1, num_attention_heads=1, intermediate_size=128, vision_output_dim=256, - image_size=224, - intermediate_layers_indices=[0], + intermediate_layers_indices=[3], ), ) ) @@ -67,7 +68,7 @@ class MllamaAdapterTestBase(TextAdapterTestBase): # Save runtime by computing the processed image once and reusing it for all tests FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" - img_processor = MllamaImageProcessor() + img_processor = MllamaImageProcessor(size={"height": 448, "width": 448}) img = Image.open(os.path.join(FIXTURES_DIR, "tests_samples", "COCO", "000000039769.png")) processed_img = img_processor(img, return_tensors="pt") @@ -90,7 +91,7 @@ def get_input_samples(self, vocab_size=1000, shape=None, config=None, dtype=torc return in_data -test_methods = generate_method_tests(MllamaAdapterTestBase, excluded_tests=[]) +test_methods = generate_method_tests(MllamaAdapterTestBase) for test_class_name, test_class in test_methods.items(): globals()[test_class_name] = test_class From 08b5ef6d5472c9c4dad0bb4491bcb7387fe50dc6 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Mon, 24 Feb 2025 23:15:24 +0100 Subject: [PATCH 56/63] update test config --- tests/test_methods/test_on_mllama.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index de33b65eb1..c8ae9d4184 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -37,33 +37,32 @@ class MllamaAdapterTestBase(TextAdapterTestBase): MllamaConfig, MllamaTextConfig( vocab_size=1000, # Minimal vocab size - hidden_size=128, + hidden_size=32, num_hidden_layers=4, num_attention_heads=2, num_key_value_heads=2, - intermediate_size=128, + intermediate_size=32, cross_attention_layers=[3], bos_token_id=990, eos_token_id=991, pad_token_id=992, - max_position_embeddings=128, + max_position_embeddings=32, rope_scaling={ "rope_type": "default", }, ), MllamaVisionConfig( - hidden_size=128, + hidden_size=32, num_hidden_layers=4, - num_global_layers=1, - num_attention_heads=1, - intermediate_size=128, - vision_output_dim=256, + num_global_layers=2, + num_attention_heads=2, + intermediate_size=32, + vision_output_dim=64, intermediate_layers_indices=[3], ), ) ) tokenizer_name = "arnavgrg/mllama-11b-vision-lora" - shape = (1, 128) # Save runtime by computing the processed image once and reusing it for all tests FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" @@ -72,7 +71,7 @@ class MllamaAdapterTestBase(TextAdapterTestBase): img = Image.open(os.path.join(FIXTURES_DIR, "tests_samples", "COCO", "000000039769.png")) processed_img = img_processor(img, return_tensors="pt") - def get_input_samples(self, vocab_size=1000, shape=None, config=None, dtype=torch.float, **kwargs): + def get_input_samples(self, vocab_size=1000, shape=(1, 128), config=None, dtype=torch.float, **kwargs): shape = shape or self.input_shape # Text inputs From 3f56276ab0bd65412956c192ad6833637baf16da Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Thu, 27 Feb 2025 17:25:56 +0100 Subject: [PATCH 57/63] Update forward context (make style & quality) --- src/adapters/models/mllama/adapter_model.py | 12 +++--------- src/adapters/models/mllama/mixin_mllama.py | 2 +- tests/test_methods/test_on_mllama.py | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 880b83da32..8a6736c901 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -14,7 +14,7 @@ ) from transformers.utils import add_start_docstrings -from ...context import AdapterSetup +from ...context import AdapterSetup, ForwardContext from ...heads import ModelWithFlexibleHeadsAdaptersMixin from ...model_mixin import EmbeddingAdaptersWrapperMixin from ...wrappers import init @@ -168,6 +168,7 @@ def __init__(self, config): self._init_head_modules() self.post_init() + @ForwardContext.wrap def forward( self, input_ids: Optional[torch.LongTensor] = None, @@ -188,12 +189,10 @@ def forward( cache_position: Optional[torch.LongTensor] = None, num_logits_to_keep: int = 0, head=None, - output_adapter_gating_scores=False, - output_adapter_fusion_attentions=False, **kwargs, ): - outputs, context = self.model( + outputs = self.model( input_ids=input_ids, pixel_values=pixel_values, aspect_ratio_mask=aspect_ratio_mask, @@ -209,12 +208,7 @@ def forward( output_hidden_states=output_hidden_states, return_dict=return_dict, cache_position=cache_position, - output_adapter_gating_scores=output_adapter_gating_scores, - output_adapter_fusion_attentions=output_adapter_fusion_attentions, - adapter_input_parallelized=kwargs.pop("adapter_input_parallelized", False), - output_context=True, ) - kwargs["context"] = context hidden_states = outputs[0] head_input_states = hidden_states[:, -num_logits_to_keep:, :] diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 45f4e37779..319d1fb6d6 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -12,7 +12,7 @@ ModelBaseAdaptersMixin, ModelWithHeadsAdaptersMixin, ) -from ..clip.mixin_clip import CLIPAttentionAdaptersMixin, CLIPEncoderAdaptersMixin, CLIPEncoderLayerAdaptersMixin +from ..clip.mixin_clip import CLIPAttentionAdaptersMixin, CLIPEncoderLayerAdaptersMixin from ..llama.mixin_llama import LlamaDecoderLayerMixin diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index c8ae9d4184..8f52294574 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -1,6 +1,7 @@ import os from pathlib import Path +import torch from PIL import Image from transformers import MllamaImageProcessor @@ -8,7 +9,6 @@ from .base import TextAdapterTestBase from .generator import generate_method_tests -import torch def from_text_vision_configs(config_class, text_config: MllamaTextConfig, vision_config: MllamaVisionConfig, **kwargs): From 8cfbb36479259de4c65406bfe731f6820ca064d7 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 14 Mar 2025 18:20:52 +0100 Subject: [PATCH 58/63] Fixes: - Remove ConditionalGenerationMixin for now - Remove decoderlayermxin from mixin_mapping - Add DecoderLayerWithAdapters for bottleneck support - Fix input shape typo in test config - make style & make quality - Next: Find out why output is the same when using bottleneck adapters and when not --- src/adapters/models/__init__.py | 6 -- src/adapters/models/mllama/mixin_mllama.py | 9 +-- src/adapters/models/mllama/modeling_mllama.py | 78 +++++++++++++++++++ tests/test_methods/method_test_impl/base.py | 2 +- tests/test_methods/test_on_mllama.py | 4 +- 5 files changed, 82 insertions(+), 17 deletions(-) diff --git a/src/adapters/models/__init__.py b/src/adapters/models/__init__.py index ca8f92b60d..b04f446ee9 100644 --- a/src/adapters/models/__init__.py +++ b/src/adapters/models/__init__.py @@ -22,9 +22,6 @@ from .mistral.mixin_mistral import MistralModelAdapterMixin from .mllama.mixin_mllama import ( MllamaAdaptersMixin, - MllamaCrossAttentionDecoderLayerAdaptersMixin, - MllamaForConditionalGenerationWithHeadsAdaptersMixin, - MllamaSelfAttentionDecoderLayerAdaptersMixin, MllamaTextCrossAttentionAdaptersMixin, MllamaTextModelAdaptersMixin, MllamaTextSelfAttentionAdaptersMixin, @@ -123,7 +120,6 @@ "LlamaForQuestionAnswering": LlamaForQuestionAnsweringAdapterMixin, "MistralModel": MistralModelAdapterMixin, # Mulitmodal Llama - "MllamaForConditionalGeneration": MllamaForConditionalGenerationWithHeadsAdaptersMixin, "MllamaModel": MllamaAdaptersMixin, "MllamaVisionModel": MllamaVisionModelAdaptersMixin, "MllamaTextModel": MllamaTextModelAdaptersMixin, @@ -132,6 +128,4 @@ "MllamaTextSelfAttention": MllamaTextSelfAttentionAdaptersMixin, "MllamaTextCrossAttention": MllamaTextCrossAttentionAdaptersMixin, "MllamaVisionEncoderLayer": MllamaVisionEncoderLayerAdaptersMixin, - "MllamaSelfAttentionDecoderLayer": MllamaSelfAttentionDecoderLayerAdaptersMixin, - "MllamaCrossAttentionDecoderLayer": MllamaCrossAttentionDecoderLayerAdaptersMixin, } diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 45f4e37779..46b364040e 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -10,9 +10,8 @@ InvertibleAdaptersMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin, - ModelWithHeadsAdaptersMixin, ) -from ..clip.mixin_clip import CLIPAttentionAdaptersMixin, CLIPEncoderAdaptersMixin, CLIPEncoderLayerAdaptersMixin +from ..clip.mixin_clip import CLIPAttentionAdaptersMixin, CLIPEncoderLayerAdaptersMixin from ..llama.mixin_llama import LlamaDecoderLayerMixin @@ -139,9 +138,3 @@ def _init_reft_layers(self, model_config, adapters_config): if not hasattr(layer, "reft_layer"): layer.reft_layer = ReftLayer("output", model_config.text_config, adapters_config) layer.register_forward_hook(hook_fn) - - -class MllamaForConditionalGenerationWithHeadsAdaptersMixin(ModelWithHeadsAdaptersMixin, MllamaAdaptersMixin): - """Adds adapters to the MllamaForConditionalGeneration class.""" - - pass diff --git a/src/adapters/models/mllama/modeling_mllama.py b/src/adapters/models/mllama/modeling_mllama.py index 196fdcbf51..995f044b6c 100644 --- a/src/adapters/models/mllama/modeling_mllama.py +++ b/src/adapters/models/mllama/modeling_mllama.py @@ -29,6 +29,7 @@ from adapters.composition import adjust_tensors_for_parallel, match_attn_matrices_for_parallel from transformers.cache_utils import Cache from transformers.models.mllama.modeling_mllama import ( + MllamaSelfAttentionDecoderLayer, MllamaTextCrossAttention, MllamaTextCrossSdpaAttention, MllamaTextSelfAttention, @@ -41,6 +42,7 @@ from transformers.utils import logging from .mixin_mllama import ( + MllamaSelfAttentionDecoderLayerAdaptersMixin, MllamaTextCrossAttentionAdaptersMixin, MllamaTextSelfAttentionAdaptersMixin, MllamaVisionAttentionAdaptersMixin, @@ -473,3 +475,79 @@ def forward( attn_output = self.o_proj(attn_output) return attn_output, None, past_key_value + + +class MllamaSelfAttentionDecoderLayerWithAdapters( + MllamaSelfAttentionDecoderLayerAdaptersMixin, MllamaSelfAttentionDecoderLayer +): + + def forward( + self, + hidden_states: torch.Tensor, + cross_attention_states: Optional[torch.Tensor] = None, + cross_attention_mask: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + full_text_row_masked_out_mask: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC + ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`, *optional*): + attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1, + query_sequence_length, key_sequence_length)` if default attention is used. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*): + Indices depicting the position of the input sequence tokens in the sequence + position_embeddings (`Tuple[torch.FloatTensor, torch.FloatTensor]`, *optional*): + Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`, + with `head_dim` being the embedding dimension of each attention head. + kwargs (`dict`, *optional*): + Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code + into the model + """ + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cache_position=cache_position, + position_embeddings=position_embeddings, + ) + hidden_states = self.attention_adapters(hidden_states, residual, None) + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + # >>> START AH Changes <<< + hidden_states = self.output_adapters(hidden_states, residual, None) + # >>> END AH Changes <<< + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights,) + + if use_cache: + outputs += (present_key_value,) + + return outputs diff --git a/tests/test_methods/method_test_impl/base.py b/tests/test_methods/method_test_impl/base.py index dcc6eeb835..95e3411725 100644 --- a/tests/test_methods/method_test_impl/base.py +++ b/tests/test_methods/method_test_impl/base.py @@ -441,4 +441,4 @@ def run_generate_test(self, adapter_config, max_new_tokens=32): model.to(torch_device) generate_input = self.build_generate_input(self.input_shape).to(torch_device) generated = model.generate(generate_input, max_new_tokens=max_new_tokens) - self.assertLessEqual(generated.shape, (self.input_shape[0], self.input_shape[1] + max_new_tokens)) \ No newline at end of file + self.assertLessEqual(generated.shape, (self.input_shape[0], self.input_shape[1] + max_new_tokens)) diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index de33b65eb1..7f27c7ebb9 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -1,6 +1,7 @@ import os from pathlib import Path +import torch from PIL import Image from transformers import MllamaImageProcessor @@ -8,7 +9,6 @@ from .base import TextAdapterTestBase from .generator import generate_method_tests -import torch def from_text_vision_configs(config_class, text_config: MllamaTextConfig, vision_config: MllamaVisionConfig, **kwargs): @@ -63,7 +63,7 @@ class MllamaAdapterTestBase(TextAdapterTestBase): ) ) tokenizer_name = "arnavgrg/mllama-11b-vision-lora" - shape = (1, 128) + input_shape = (1, 128) # Save runtime by computing the processed image once and reusing it for all tests FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" From f7ff891f8c6de488718db0cc602714690776c59e Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 15 Apr 2025 21:55:23 +0200 Subject: [PATCH 59/63] Adapt adapter head logic from llama --- src/adapters/models/mllama/adapter_model.py | 40 ++++++++++++++------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/src/adapters/models/mllama/adapter_model.py b/src/adapters/models/mllama/adapter_model.py index 8a6736c901..01056080c7 100644 --- a/src/adapters/models/mllama/adapter_model.py +++ b/src/adapters/models/mllama/adapter_model.py @@ -14,7 +14,8 @@ ) from transformers.utils import add_start_docstrings -from ...context import AdapterSetup, ForwardContext +from ...composition import adjust_tensors_for_parallel +from ...context import ForwardContext from ...heads import ModelWithFlexibleHeadsAdaptersMixin from ...model_mixin import EmbeddingAdaptersWrapperMixin from ...wrappers import init @@ -210,16 +211,31 @@ def forward( cache_position=cache_position, ) - hidden_states = outputs[0] - head_input_states = hidden_states[:, -num_logits_to_keep:, :] + batch_size = outputs[0].shape[0] + + if self.config.pad_token_id is None: + # TODO-AH: this may result in unexpected behavior for classification. Find a better way to do this? + sequence_lengths = -1 + else: + if input_ids is not None: + sequence_lengths = torch.ne(input_ids, self.config.pad_token_id).sum(-1) - 1 + (sequence_lengths,) = adjust_tensors_for_parallel(outputs[0], sequence_lengths) + else: + sequence_lengths = -1 + logger.warning( + f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be " + "unexpected if using padding tokens in conjunction with `inputs_embeds.`" + ) + + cls_logits = outputs[0][range(batch_size), sequence_lengths] + + outputs = self.forward_head( + outputs, + head_name=head, + cls_output=cls_logits, + attention_mask=attention_mask, + return_dict=return_dict, + **kwargs, + ) - if head or AdapterSetup.get_context_head_setup() or self.active_head: - head_outputs = self.forward_head( - head_input_states, - head_name=head, - attention_mask=attention_mask, - return_dict=return_dict, - **kwargs, - ) - return head_outputs return outputs From b180267b5763b90eccfe83b2ee954913575d8ce4 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Tue, 15 Apr 2025 21:55:49 +0200 Subject: [PATCH 60/63] Fix typos & make style --- src/adapters/models/mllama/modeling_mllama.py | 3 +++ tests/test_methods/test_on_mllama.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/adapters/models/mllama/modeling_mllama.py b/src/adapters/models/mllama/modeling_mllama.py index 995f044b6c..aeb54f3dfc 100644 --- a/src/adapters/models/mllama/modeling_mllama.py +++ b/src/adapters/models/mllama/modeling_mllama.py @@ -532,7 +532,10 @@ def forward( cache_position=cache_position, position_embeddings=position_embeddings, ) + + # >>> START AH Changes <<< hidden_states = self.attention_adapters(hidden_states, residual, None) + # >>> END AH Changes <<< # Fully Connected residual = hidden_states diff --git a/tests/test_methods/test_on_mllama.py b/tests/test_methods/test_on_mllama.py index db05d8023d..35ab474587 100644 --- a/tests/test_methods/test_on_mllama.py +++ b/tests/test_methods/test_on_mllama.py @@ -1,7 +1,6 @@ import os from pathlib import Path -import torch import torch from PIL import Image From b633433d1c6fe97f1e6c5de39b3e0840d4e9335a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 16 Apr 2025 21:11:35 +0200 Subject: [PATCH 61/63] Fix invertible adapter forward pass: - Add post_embedding_forward hook logic and hook registration to the text model - Without it invertible adapters are not invoked although added to the model --- src/adapters/models/mllama/mixin_mllama.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 46b364040e..4c08e40edc 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -86,6 +86,17 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: for i, layer in enumerate(self.layers): yield i, layer + def init_adapters(self, model_config, adapters_config): + super().init_adapters(model_config, adapters_config) + + # Register hook for post embedding forward + self.embed_tokens.register_forward_hook(self.post_embedding_forward) + + def post_embedding_forward(self, module, args, embedding_output): + embedding_output = self.invertible_adapters_forward(embedding_output) + # Prompt tuning not yet supported + return embedding_output + class MllamaAdaptersMixin(EmbeddingAdaptersWrapperMixin, InvertibleAdaptersWrapperMixin, ModelBaseAdaptersMixin): """ From 157e142fb62417b230125e329236325a0b62eec8 Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Wed, 16 Apr 2025 22:14:57 +0200 Subject: [PATCH 62/63] Add MllamaCrossAttentionDecoderLayerWithAdapters: - With MllamaCrossAttentionDecoderLayerWithAdapters the normal crossattentionlayer is replaced during adapters.init() and adds the adapters logic - Also redundant mixins are removed form the model_mixin_mapping --- src/adapters/models/__init__.py | 6 -- src/adapters/models/mllama/modeling_mllama.py | 60 +++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/adapters/models/__init__.py b/src/adapters/models/__init__.py index b04f446ee9..02f5d3751a 100644 --- a/src/adapters/models/__init__.py +++ b/src/adapters/models/__init__.py @@ -22,10 +22,7 @@ from .mistral.mixin_mistral import MistralModelAdapterMixin from .mllama.mixin_mllama import ( MllamaAdaptersMixin, - MllamaTextCrossAttentionAdaptersMixin, MllamaTextModelAdaptersMixin, - MllamaTextSelfAttentionAdaptersMixin, - MllamaVisionAttentionAdaptersMixin, MllamaVisionEncoderAdaptersMixin, MllamaVisionEncoderLayerAdaptersMixin, MllamaVisionModelAdaptersMixin, @@ -124,8 +121,5 @@ "MllamaVisionModel": MllamaVisionModelAdaptersMixin, "MllamaTextModel": MllamaTextModelAdaptersMixin, "MllamaVisionEncoder": MllamaVisionEncoderAdaptersMixin, - "MllamaVisionAttention": MllamaVisionAttentionAdaptersMixin, - "MllamaTextSelfAttention": MllamaTextSelfAttentionAdaptersMixin, - "MllamaTextCrossAttention": MllamaTextCrossAttentionAdaptersMixin, "MllamaVisionEncoderLayer": MllamaVisionEncoderLayerAdaptersMixin, } diff --git a/src/adapters/models/mllama/modeling_mllama.py b/src/adapters/models/mllama/modeling_mllama.py index aeb54f3dfc..4f5b3feb0c 100644 --- a/src/adapters/models/mllama/modeling_mllama.py +++ b/src/adapters/models/mllama/modeling_mllama.py @@ -29,6 +29,7 @@ from adapters.composition import adjust_tensors_for_parallel, match_attn_matrices_for_parallel from transformers.cache_utils import Cache from transformers.models.mllama.modeling_mllama import ( + MllamaCrossAttentionDecoderLayer, MllamaSelfAttentionDecoderLayer, MllamaTextCrossAttention, MllamaTextCrossSdpaAttention, @@ -42,6 +43,7 @@ from transformers.utils import logging from .mixin_mllama import ( + MllamaCrossAttentionDecoderLayerAdaptersMixin, MllamaSelfAttentionDecoderLayerAdaptersMixin, MllamaTextCrossAttentionAdaptersMixin, MllamaTextSelfAttentionAdaptersMixin, @@ -554,3 +556,61 @@ def forward( outputs += (present_key_value,) return outputs + + +class MllamaCrossAttentionDecoderLayerWithAdapters( + MllamaCrossAttentionDecoderLayer, MllamaCrossAttentionDecoderLayerAdaptersMixin +): + + def forward( + self, + hidden_states: torch.Tensor, + cross_attention_states: torch.Tensor, + cross_attention_mask: torch.Tensor, + attention_mask: torch.Tensor, + full_text_row_masked_out_mask: Tuple[torch.Tensor, torch.Tensor], + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[torch.Tensor] = None, + ) -> Tuple[torch.Tensor]: + + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) + + hidden_states, attn_weights, past_key_value = self.cross_attn( + hidden_states=hidden_states, + attention_mask=cross_attention_mask, + cross_attention_states=cross_attention_states, + past_key_value=past_key_value, + output_attentions=output_attentions, + cache_position=cache_position, + ) + # >>> START AH Changes <<< + hidden_states = self.attention_adapters(hidden_states, residual, None) + # >>> END AH Changes <<< + hidden_states = residual + self.cross_attn_attn_gate.tanh() * hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + # >>> START AH Changes <<< + hidden_states = self.output_adapters(hidden_states, residual, None) + # >>> END AH Changes <<< + + if full_text_row_masked_out_mask is not None: + hidden_states = full_text_row_masked_out_mask[:, 0] * hidden_states # type: ignore + hidden_states = residual + self.cross_attn_mlp_gate.tanh() * hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (attn_weights,) + + if use_cache: + outputs += (past_key_value,) + + return outputs From 93bad84c98aa3e33dcbba176632cde719a24c50a Mon Sep 17 00:00:00 2001 From: Timo Imhof Date: Fri, 2 May 2025 15:09:44 +0200 Subject: [PATCH 63/63] Use _default_init_adapter_methods in model mixin --- src/adapters/models/mllama/mixin_mllama.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/adapters/models/mllama/mixin_mllama.py b/src/adapters/models/mllama/mixin_mllama.py index 4c08e40edc..30a5a7462d 100644 --- a/src/adapters/models/mllama/mixin_mllama.py +++ b/src/adapters/models/mllama/mixin_mllama.py @@ -4,6 +4,7 @@ from ...composition import adjust_tensors_for_parallel_ from ...methods.reft import ReftLayer, hook_fn +from ...methods.prefix_tuning import PrefixTuningPool from ...model_mixin import ( EmbeddingAdaptersMixin, EmbeddingAdaptersWrapperMixin, @@ -62,7 +63,7 @@ def hook(module, args, kwargs): layer.register_forward_pre_hook(hook, with_kwargs=True) -class MllamaVisionModelAdaptersMixin: +class MllamaVisionModelAdaptersMixin(ModelBaseAdaptersMixin): """Adds adapters to the a MllamaVisionModel class.""" support_prompt_tuning = False @@ -77,10 +78,11 @@ def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: yield i, layer -class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin): +class MllamaTextModelAdaptersMixin(EmbeddingAdaptersMixin, InvertibleAdaptersMixin, ModelBaseAdaptersMixin): """Adds adapters to the a MllamaTextModel class.""" support_prompt_tuning = False + invertible_adapters_base_name = "language_model" def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: for i, layer in enumerate(self.layers): @@ -109,7 +111,6 @@ class MllamaAdaptersMixin(EmbeddingAdaptersWrapperMixin, InvertibleAdaptersWrapp def iter_layers(self) -> Iterable[Tuple[int, nn.Module]]: layer_idx = 0 - # First iterate through vision model's local transformer layers for _, layer in self.vision_model.iter_layers(): yield layer_idx, layer layer_idx += 1 @@ -133,19 +134,18 @@ def _init_adapters_submodules(self, model_config, adapters_config): if hasattr(module, "init_adapters"): module.init_adapters(model_config.text_config, adapters_config) - # Initialize ReFT for all layers if needed - self._init_reft_layers(model_config, adapters_config) - - def _init_reft_layers(self, model_config, adapters_config): - """Initialize ReFT layers for both vision and language components.""" - # Vision local transformer + def _default_init_adapter_methods(self, model_config, adapters_config): + # Patch for ReFT initialization for _, layer in self.vision_model.iter_layers(): if not hasattr(layer, "reft_layer"): layer.reft_layer = ReftLayer("output", model_config.vision_config, adapters_config) layer.register_forward_hook(hook_fn) - # Language model layers for _, layer in self.language_model.iter_layers(): if not hasattr(layer, "reft_layer"): layer.reft_layer = ReftLayer("output", model_config.text_config, adapters_config) layer.register_forward_hook(hook_fn) + + # Add prefix tuning + self.base_model.prefix_tuning = PrefixTuningPool(model_config, adapters_config) +