changing test markers

avinash2692 · avinash2692 · commit cb754f2dcfed · 2025-08-25T15:58:59.000-07:00
diff --git a/.github/workflows/quality.yml b/.github/workflows/quality.yml
@@ -18,7 +18,7 @@ jobs:
       matrix:
         python-version: ['3.10', '3.11', '3.12'] # Need to add 3.13 once we resolve outlines issues.
     env:
-      GITHUB_ACTIONS: 1
+      GITHUB_ACTION: 1
     steps:
       - uses: actions/checkout@v4
       - name: Install uv and set the python version
diff --git a/test/backends/test_huggingface.py b/test/backends/test_huggingface.py
@@ -21,7 +21,6 @@
 @pytest.fixture(scope="module")
 def backend():
     """Shared HuggingFace backend for all tests in this module."""
-    # TODO: find a smalle 1B model to do Alora stuff on github actions.
     backend = LocalHFBackend(
         model_id="ibm-granite/granite-3.2-8b-instruct",
         formatter=TemplateFormatter(model_id="ibm-granite/granite-4.0-tiny-preview"),
@@ -38,15 +37,15 @@ def session(backend):
     yield session
     session.reset()
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_system_prompt(session):
     result = session.chat(
         "Where are we going?",
         model_options={ModelOption.SYSTEM_PROMPT: "Talk like a pirate."},
     )
     print(result)
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_constraint_alora(session, backend):
     answer = session.instruct(
         "Corporate wants you to find the difference between these two strings: aaaaaaaaaa aaaaabaaaa. Be concise and don't write code to answer the question.",
@@ -64,7 +63,7 @@ def test_constraint_alora(session, backend):
     )
     assert alora_output in ["Y", "N"], alora_output
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_constraint_lora_with_requirement(session, backend):
     answer = session.instruct(
         "Corporate wants you to find the difference between these two strings: aaaaaaaaaa aaaaabaaaa"
@@ -80,7 +79,7 @@ def test_constraint_lora_with_requirement(session, backend):
     assert isinstance(val_result, ValidationResult)
     assert str(val_result.reason) in ["Y", "N"]
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_constraint_lora_override(session, backend):
     backend.default_to_constraint_checking_alora = False  # type: ignore
     answer = session.instruct(
@@ -95,7 +94,7 @@ def test_constraint_lora_override(session, backend):
     assert isinstance(default_output_to_bool(str(val_result.reason)), bool)
     backend.default_to_constraint_checking_alora = True
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_constraint_lora_override_does_not_override_alora(session, backend):
     backend.default_to_constraint_checking_alora = False  # type: ignore
     answer = session.instruct(
@@ -112,7 +111,7 @@ def test_constraint_lora_override_does_not_override_alora(session, backend):
     assert str(val_result.reason) in ["Y", "N"]
     backend.default_to_constraint_checking_alora = True
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_llmaj_req_does_not_use_alora(session, backend):
     backend.default_to_constraint_checking_alora = True  # type: ignore
     answer = session.instruct(
@@ -128,12 +127,12 @@ def test_llmaj_req_does_not_use_alora(session, backend):
     assert isinstance(val_result, ValidationResult)
     assert str(val_result.reason) not in ["Y", "N"]
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_instruct(session):
     result = session.instruct("Compute 1+1.")
     print(result)
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_multiturn(session):
     session.instruct("Compute 1+1")
     beta = session.instruct(
@@ -143,7 +142,7 @@ def test_multiturn(session):
     words = session.instruct("Now list five English words that start with that letter.")
     print(words)
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_format(session):
     class Person(pydantic.BaseModel):
         name: str
@@ -173,7 +172,7 @@ class Email(pydantic.BaseModel):
         "The email address should be at example.com"
     )
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_generate_from_raw(session):
     prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
 
@@ -183,7 +182,7 @@ def test_generate_from_raw(session):
 
     assert len(results) == len(prompts)
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_generate_from_raw_with_format(session):
     prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
 
diff --git a/test/backends/test_ollama.py b/test/backends/test_ollama.py
@@ -18,7 +18,7 @@ def session():
     session.reset()
 
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_simple_instruct(session):
     result = session.instruct(
         "Write an email to Hendrik trying to sell him self-sealing stembolts."
@@ -28,7 +28,7 @@ def test_simple_instruct(session):
     assert result._meta["chat_response"].message.role == "assistant"
 
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_instruct_with_requirement(session):
     response = session.instruct(
         "Write an email to Hendrik convincing him to buy some self-sealing stembolts."
@@ -51,14 +51,14 @@ def test_instruct_with_requirement(session):
     )
     print(results)
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_chat(session):
     output_message = session.chat("What is 1+1?")
     assert "2" in output_message.content, (
         f"Expected a message with content containing 2 but found {output_message}"
     )
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_format(session):
     class Person(pydantic.BaseModel):
         name: str
@@ -91,7 +91,7 @@ class Email(pydantic.BaseModel):
     # assert email.to.email_address.endswith("example.com")
     pass
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_generate_from_raw(session):
     prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
 
diff --git a/test/backends/test_openai_ollama.py b/test/backends/test_openai_ollama.py
@@ -37,13 +37,13 @@ def m_session(backend):
     yield session
     session.reset()
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_instruct(m_session):
     result = m_session.instruct("Compute 1+1.")
     assert isinstance(result, ModelOutputThunk)
     assert "2" in result.value  # type: ignore
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_multiturn(m_session):
     m_session.instruct("What is the capital of France?")
     answer = m_session.instruct("Tell me the answer to the previous question.")
@@ -64,7 +64,7 @@ def test_multiturn(m_session):
     #     assert "granite3.3:8b" in result.value
     #     self.m.reset()
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_format(m_session):
     class Person(pydantic.BaseModel):
         name: str
diff --git a/test/backends/test_watsonx.py b/test/backends/test_watsonx.py
@@ -29,21 +29,21 @@ def session(backend):
     session.reset()
 
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_instruct(session):
     result = session.instruct("Compute 1+1.")
     assert isinstance(result, ModelOutputThunk)
     assert "2" in result.value  # type: ignore
 
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_multiturn(session):
     session.instruct("What is the capital of France?")
     answer = session.instruct("Tell me the answer to the previous question.")
     assert "Paris" in answer.value  # type: ignore
 
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_format(session):
     class Person(pydantic.BaseModel):
         name: str
@@ -77,7 +77,7 @@ class Email(pydantic.BaseModel):
     pass
 
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_generate_from_raw(session):
     prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]
 
diff --git a/test/conftest.py b/test/conftest.py
@@ -14,7 +14,7 @@ def gh_run() -> int:
 
 
 def pytest_runtest_setup(item):
-    # Runs tests *not* marked with `@pytest.mark.llm` to run normally.
+    # Runs tests *not* marked with `@pytest.mark.qualitative` to run normally.
     if not item.get_closest_marker("qualitative"):
         return
 
diff --git a/test/stdlib_basics/test_contextual_session.py b/test/stdlib_basics/test_contextual_session.py
@@ -74,7 +74,7 @@ def test_generative_with_contextual_session(model_id):
         assert isinstance(summary, str)
         assert len(summary) > 0
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_generative_backward_compatibility(model_id):
     """Test that generative slots still work with explicit session parameter."""
     with start_session(model_id=model_id) as m:
diff --git a/test/stdlib_basics/test_genslot.py b/test/stdlib_basics/test_genslot.py
@@ -33,7 +33,7 @@ def test_func(session):
     write_email_component = write_me_an_email(session)
     assert isinstance(write_email_component, str)
 
-@pytest.mark.llm
+@pytest.mark.qualitative
 def test_sentiment_output(classify_sentiment_output):
     assert classify_sentiment_output in ["positive", "negative"]