nerdy-tech-com-gitub
diff --git a/‎.github/workflows/pytest_cpu_gha_runner.yaml‎
Lines changed: 6 additions & 13 deletions b/‎.github/workflows/pytest_cpu_gha_runner.yaml‎
Lines changed: 6 additions & 13 deletions
diff --git a/‎src/llama_recipes/configs/datasets.py‎
Lines changed: 0 additions & 1 deletion b/‎src/llama_recipes/configs/datasets.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/llama_recipes/datasets/samsum_dataset.py‎
Lines changed: 14 additions & 3 deletions b/‎src/llama_recipes/datasets/samsum_dataset.py‎
Lines changed: 14 additions & 3 deletions
diff --git a/‎src/llama_recipes/finetuning.py‎
Lines changed: 1 addition & 1 deletion b/‎src/llama_recipes/finetuning.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/tests/conftest.py‎
Lines changed: 28 additions & 11 deletions b/‎src/tests/conftest.py‎
Lines changed: 28 additions & 11 deletions
diff --git a/‎src/tests/datasets/test_custom_dataset.py‎
Lines changed: 9 additions & 3 deletions b/‎src/tests/datasets/test_custom_dataset.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎src/tests/datasets/test_grammar_datasets.py‎
Lines changed: 7 additions & 15 deletions b/‎src/tests/datasets/test_grammar_datasets.py‎
Lines changed: 7 additions & 15 deletions
diff --git a/‎src/tests/datasets/test_samsum_datasets.py‎
Lines changed: 30 additions & 14 deletions b/‎src/tests/datasets/test_samsum_datasets.py‎
Lines changed: 30 additions & 14 deletions
@@ -1,16 +1,10 @@
 name: "[GHA][CPU] llama-recipes Pytest tests on CPU GitHub hosted runner."
 on:
   pull_request:
-    branches:    
+    branches:
       - 'main'
-    paths:
-      - 'src/llama-recipes/configs/*.py'
-      - 'src/llama-recipes/utils/*.py'
-      - 'src/llama-recipes/datasets/*.py'
-      - 'src/llama-recipes/data/*.py'
-      - 'src/llama-recipes/*.py'
 
-  # triggers workflow manually for debugging purposes.      
+  # triggers workflow manually for debugging purposes.
   workflow_dispatch:
     inputs:
       runner:
@@ -23,8 +17,8 @@ on:
           required: false
           default: "true"
 
-env: 
-  PYTORCH_WHEEL_URL: https://download.pytorch.org/whl/test/cu118  
+env:
+  PYTORCH_WHEEL_URL: https://download.pytorch.org/whl/test/cu118
 
 jobs:
   execute_workflow:
@@ -63,19 +57,18 @@ jobs:
         id: install_llama_recipes_package
         run: |
           echo "Installing 'llama-recipes' project (re: https://github.com/facebookresearch/llama-recipes?tab=readme-ov-file#install-with-optional-dependencies)"
-          pip install --extra-index-url ${PYTORCH_WHEEL_URL} -e '.[tests]' 
+          pip install --extra-index-url ${PYTORCH_WHEEL_URL} -e '.[tests]'
 
 
       - name: "Running PyTest tests on GHA CPU Runner"
         id: pytest
         run: |
           echo "Running PyTest tests at 'GITHUB_WORKSPACE' path: ${GITHUB_WORKSPACE}"
           cd $GITHUB_WORKSPACE && python3 -m pytest --junitxml="$GITHUB_WORKSPACE/result.xml"
-  
+
       - name: Publish Test Summary
         id: test_summary
         uses: test-summary/action@v2
         with:
           paths: "**/*.xml"
         if: always()
-          
@@ -9,7 +9,6 @@ class samsum_dataset:
     dataset: str =  "samsum_dataset"
     train_split: str = "train"
     test_split: str = "validation"
-    trust_remote_code: bool = False
 
 
 @dataclass
 
@@ -6,11 +6,22 @@
 import copy
 import datasets
 
+from unittest.mock import patch
+
+@patch('builtins.input', return_value="N")
+def load_samsum(split, _):
+    try:
+        ds = datasets.load_dataset("Samsung/samsum", split=split)
+    except ValueError as e:
+        if "trust_remote_code" in str(e):
+          raise ValueError("Loading Samsung/samsum requires you to execute the dataset script in that repo on your local machine. Make sure you have read the code there to avoid malicious use, then set HF_DATASETS_TRUST_REMOTE_CODE env variable to True.") from e
+        else:
+          raise e
+    return ds
+
 
 def get_preprocessed_samsum(dataset_config, tokenizer, split):
-    if not hasattr(dataset_config, "trust_remote_code") or not dataset_config.trust_remote_code:
-        raise ValueError("The repository for samsum contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/samsum. To activate `trust_remote_code` option use this config: --samsum_dataset.trust_remote_code=True")
-    dataset = datasets.load_dataset("samsum", split=split, trust_remote_code=dataset_config.trust_remote_code)
+    dataset = load_samsum(split)
 
     prompt = (
         f"Summarize this dialog:\n{{dialog}}\n---\nSummary:\n"
 
@@ -289,7 +289,7 @@ def main(**kwargs):
         )
         print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
         if len(eval_dataloader) == 0:
-            raise ValueError("The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set.")
+            raise ValueError(f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})")
         else:
             print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")
 
 
@@ -3,19 +3,27 @@
 
 import pytest
 
-from transformers import AutoTokenizer
+from utils import maybe_tokenizer
 
-ACCESS_ERROR_MSG = "Could not access tokenizer at 'meta-llama/Llama-2-7b-hf'. Did you log into huggingface hub and provided the correct token?"
-LLAMA_VERSIONS = ["meta-llama/Llama-2-7b-hf", "meta-llama/Meta-Llama-3.1-8B-Instruct"]
+ACCESS_ERROR_MSG = "Could not access tokenizer. Did you log into huggingface hub and provided the correct token?"
+
+LLAMA_VERSIONS = ["meta-llama/Llama-2-7b-hf", "meta-llama/Meta-Llama-3.1-8B-Instruct", "fake_llama"]
+
+LLAMA_TOKENIZERS = {k: maybe_tokenizer(k) for k in LLAMA_VERSIONS}
 
 @pytest.fixture(params=LLAMA_VERSIONS)
 def llama_version(request):
     return request.param
 
 
+@pytest.fixture(params=["mllama", "llama"])
+def model_type(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
 def llama_tokenizer(request):
-    return {k: AutoTokenizer.from_pretrained(k) for k in LLAMA_VERSIONS}
+    return LLAMA_TOKENIZERS
 
 
 @pytest.fixture
@@ -26,6 +34,13 @@ def _helper(tokenizer_mock):
 
     return _helper
 
+@pytest.fixture
+def setup_processor(llama_tokenizer, llama_version):
+    def _helper(processor_mock):
+        processor_mock.from_pretrained.return_value.tokenizer = llama_tokenizer[llama_version]
+
+    return _helper
+
 
 def pytest_addoption(parser):
     parser.addoption(
@@ -38,16 +53,18 @@ def pytest_configure(config):
 
 
 def pytest_collection_modifyitems(config, items):
+    #skip tests marked with skip_missing_tokenizer if tokenizer is unavailable unless --unskip-missing-tokenizer is passed
     if config.getoption("--unskip-missing-tokenizer"):
         return
 
-    try:
-        AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
-        tokenizer_available = True
-    except OSError:
-        tokenizer_available = False
-
     skip_missing_tokenizer = pytest.mark.skip(reason=ACCESS_ERROR_MSG)
     for item in items:
-        if "skip_missing_tokenizer" in item.keywords and not tokenizer_available:
+        # get the tokenizer for the test
+        version = [v for v in LLAMA_VERSIONS for i in item.keywords if v in i]
+        if len(version) == 0:
+            # no tokenizer used in this test
+            continue
+        version = version.pop()
+        assert version in LLAMA_TOKENIZERS
+        if "skip_missing_tokenizer" in item.keywords and LLAMA_TOKENIZERS[version] is None:
             item.add_marker(skip_missing_tokenizer)
@@ -2,6 +2,7 @@
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 
 import pytest
+from contextlib import nullcontext
 from unittest.mock import patch
 
 from transformers import LlamaTokenizer
@@ -96,15 +97,17 @@ def test_custom_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker,
 
 
 @patch('llama_recipes.finetuning.train')
+@patch('llama_recipes.finetuning.AutoConfig.from_pretrained')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
 @patch('llama_recipes.finetuning.AutoTokenizer.from_pretrained')
 @patch('llama_recipes.finetuning.optim.AdamW')
 @patch('llama_recipes.finetuning.StepLR')
-def test_unknown_dataset_error(step_lr, optimizer, tokenizer, get_model, train, mocker, llama_version):
+def test_unknown_dataset_error(step_lr, optimizer, tokenizer, get_model, get_config, train, mocker, llama_version):
     from llama_recipes.finetuning import main
 
     tokenizer.return_value = mocker.MagicMock(side_effect=lambda x: {"input_ids":[len(x)*[0,]], "attention_mask": [len(x)*[0,]]})
     get_model.return_value.get_input_embeddings.return_value.weight.shape = [32000 if "Llama-2" in llama_version else 128256]
+    get_config.return_value.model_type = "llama"
 
     kwargs = {
         "dataset": "custom_dataset",
@@ -131,13 +134,16 @@ def test_tokenize_dialog(tokenizer, monkeypatch, setup_tokenizer, llama_version)
         {"role":"assistant", "content":"Romans"},
     ]
 
-    result = tokenize_dialog(dialog, tokenizer)
+    c = pytest.raises(AttributeError) if llama_version == "fake_llama" else nullcontext()
+
+    with c:
+        result = tokenize_dialog(dialog, tokenizer)
 
     if "Llama-2" in llama_version:
         assert result["labels"][:12] == [-100] * 12
         assert result["labels"][17:28] == [-100] * 11
         assert result["labels"].count(-100) == 11 + 12
-    else:
+    elif "Llama-3" in llama_version:
         assert result["labels"][:38] == [-100] * 38
         assert result["labels"][43:54] == [-100] * 11
         assert result["labels"].count(-100) == 38 + 11
@@ -1,32 +1,27 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 
+from pathlib import Path
 import pytest
 from unittest.mock import patch
 
-
-EXPECTED_RESULTS = {
-    "meta-llama/Llama-2-7b-hf":{
-        "label": 1152,
-        "pos": 31,
-    },
-    "meta-llama/Meta-Llama-3.1-8B":{
-        "label": 40,
-        "pos": 26,
-    },
-}
+DATA_DIR = Path(__file__).parents[2] / "llama_recipes/datasets/grammar_dataset/"
 
 @pytest.mark.skip_missing_tokenizer
+@pytest.mark.skipif(not Path(DATA_DIR / "grammar_validation.csv").exists(), reason="grammar_validation.csv not found")
+@pytest.mark.skipif(not Path(DATA_DIR / "gtrain_10k.csv").exists(), reason="gtrain_10k.csv not found")
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.AutoTokenizer')
+@patch('llama_recipes.finetuning.AutoConfig.from_pretrained')
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
 @patch('llama_recipes.finetuning.optim.AdamW')
 @patch('llama_recipes.finetuning.StepLR')
-def test_grammar_dataset(step_lr, optimizer, get_model, tokenizer, train, setup_tokenizer, llama_version):
+def test_grammar_dataset(step_lr, optimizer, get_model, get_config, tokenizer, train, setup_tokenizer, llama_version):
     from llama_recipes.finetuning import main
 
     setup_tokenizer(tokenizer)
     get_model.return_value.get_input_embeddings.return_value.weight.shape = [32000 if "Llama-2" in llama_version else 128256]
+    get_config.return_value.model_type = "llama"
 
     BATCH_SIZE = 8
     kwargs = {
@@ -58,9 +53,6 @@ def test_grammar_dataset(step_lr, optimizer, get_model, tokenizer, train, setup_
     assert "input_ids" in batch.keys()
     assert "attention_mask" in batch.keys()
 
-    assert batch["labels"][0][EXPECTED_RESULTS[llama_version]["pos"]-1] == -100
-    assert batch["labels"][0][EXPECTED_RESULTS[llama_version]["pos"]] == EXPECTED_RESULTS[llama_version]["label"]
-
     token = args[3]
     assert batch["input_ids"][0][0] == token.bos_token_id
     assert batch["labels"][0][-1] == token.eos_token_id
 
@@ -2,31 +2,50 @@
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 
 import pytest
+from dataclasses import dataclass
 from functools import partial
 from unittest.mock import patch
+from datasets import load_dataset
 
-EXPECTED_RESULTS = {
-    "meta-llama/Llama-2-7b-hf":{
-        "label": 8432,
-        "pos": 242,
-    },
-    "meta-llama/Meta-Llama-3.1-8B":{
-        "label": 2250,
-        "pos": 211,
-    },
-}
+@dataclass
+class Config:
+    model_type: str = "llama"
 
+try:
+    load_dataset("Samsung/samsum")
+    SAMSUM_UNAVAILABLE = False
+except ValueError:
+    SAMSUM_UNAVAILABLE = True
+
+@pytest.mark.skipif(SAMSUM_UNAVAILABLE, reason="Samsum dataset is unavailable")
 @pytest.mark.skip_missing_tokenizer
 @patch('llama_recipes.finetuning.train')
 @patch('llama_recipes.finetuning.AutoTokenizer')
+@patch("llama_recipes.finetuning.AutoConfig.from_pretrained")
+@patch("llama_recipes.finetuning.AutoProcessor")
+@patch("llama_recipes.finetuning.MllamaForConditionalGeneration.from_pretrained")
 @patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
 @patch('llama_recipes.finetuning.optim.AdamW')
 @patch('llama_recipes.finetuning.StepLR')
-def test_samsum_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_tokenizer, llama_version):
+def test_samsum_dataset(
+    step_lr,
+    optimizer,
+    get_model,
+    get_mmodel,
+    processor,
+    get_config,
+    tokenizer,
+    train,
+    mocker,
+    setup_tokenizer,
+    llama_version,
+    ):
     from llama_recipes.finetuning import main
 
     setup_tokenizer(tokenizer)
     get_model.return_value.get_input_embeddings.return_value.weight.shape = [32000 if "Llama-2" in llama_version else 128256]
+    get_mmodel.return_value.get_input_embeddings.return_value.weight.shape = [0]
+    get_config.return_value = Config()
 
     BATCH_SIZE = 8
     kwargs = {
@@ -59,9 +78,6 @@ def test_samsum_dataset(step_lr, optimizer, get_model, tokenizer, train, mocker,
     assert "input_ids" in batch.keys()
     assert "attention_mask" in batch.keys()
 
-    assert batch["labels"][0][EXPECTED_RESULTS[llama_version]["pos"]-1] == -100
-    assert batch["labels"][0][EXPECTED_RESULTS[llama_version]["pos"]] == EXPECTED_RESULTS[llama_version]["label"]
-
     assert batch["input_ids"][0][0] == token.bos_token_id
     assert batch["labels"][0][-1] == token.eos_token_id
     assert batch["input_ids"][0][-1] == token.eos_token_id
Original file line number	Diff line number	Diff line change
`@@ -289,7 +289,7 @@ def main(**kwargs):`
`289`	`289`	`)`
`290`	`290`	`print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")`
`291`	`291`	`if len(eval_dataloader) == 0:`
`292`		`- raise ValueError("The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set.")`
	`292`	`+ raise ValueError(f"The eval set size is too small for dataloader to load even one batch. Please increase the size of eval set. ({len(eval_dataloader)=})")`
`293`	`293`	`else:`
`294`	`294`	`print(f"--> Num of Validation Set Batches loaded = {len(eval_dataloader)}")`
`295`	`295`