Fix the versions for code-quality

tarun-etikala · tarun-etikala · commit 9cc2cf98666f · 2026-01-21T08:11:21.000-05:00
diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml
@@ -21,7 +21,7 @@ jobs:
           python-version: '3.11'
 
       - name: Install Ruff
-        run: pip install ruff
+        run: pip install ruff==0.14.4
 
       - name: Run Ruff linter
         run: ruff check . --output-format=github
@@ -30,7 +30,7 @@ jobs:
         run: ruff format --check .
 
       - name: Run Markdownlint
-        uses: nosborn/github-action-markdown-cli@v3.3.0
+        uses: nosborn/github-action-markdown-cli@v3.4.0
         with:
           files: .
           config_file: .markdownlint.json
diff --git a/assets/usecase/knowledge-tuning/Knowledge Tuning Workflow.excalidraw b/assets/usecase/knowledge-tuning/Knowledge Tuning Workflow.excalidraw
@@ -4817,4 +4817,4 @@
     "lockedMultiSelections": {}
   },
   "files": {}
-}
+}
diff --git a/examples/fine-tuning/rhoai-3.2/osft/osft-example.ipynb b/examples/fine-tuning/rhoai-3.2/osft/osft-example.ipynb
@@ -579,9 +579,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Stream logs\n",
diff --git a/examples/fine-tuning/rhoai-3.2/training-hub/sft/sft.ipynb b/examples/fine-tuning/rhoai-3.2/training-hub/sft/sft.ipynb
@@ -35,12 +35,6 @@
     "!python3 -m pip install --force-reinstall --no-cache-dir -U ipykernel"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "id": "e5fcc948",
-   "metadata": {},
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "id": "49a99fc8-24d5-4040-bd1d-faa7c7e1ef27",
diff --git a/examples/knowledge-tuning/04_Knowledge_Mixing/utils/knowledge_utils.py b/examples/knowledge-tuning/04_Knowledge_Mixing/utils/knowledge_utils.py
@@ -100,8 +100,7 @@ def sample_doc_qa(
 def _clean_response_text(df: pl.DataFrame) -> pl.DataFrame:
     """Clean response text by removing markers and whitespace."""
     return df.with_columns(
-        pl
-        .col("response")
+        pl.col("response")
         .str.replace_all(r"\[END\]", "")
         .str.replace_all(r"\[ANSWER\]", "")
         .str.strip_chars()
@@ -112,8 +111,7 @@ def _clean_response_text(df: pl.DataFrame) -> pl.DataFrame:
 def _create_metadata(df: pl.DataFrame) -> pl.Expr:
     """Create metadata JSON structure."""
     return (
-        pl
-        .struct([
+        pl.struct([
             pl.col("document").alias("sdg_document"),
             pl.lit("document_knowledge_qa").alias("dataset"),
             pl.col("raw_document"),
@@ -234,8 +232,7 @@ def generate_knowledge_qa_dataset(
             "reasoning",
         ]
         messages_expr = (
-            pl
-            .struct(message_columns)
+            pl.struct(message_columns)
             .map_elements(_create_messages_with_reasoning_no_document)
             .alias("messages")
         )
@@ -248,24 +245,21 @@ def generate_knowledge_qa_dataset(
             "reasoning",
         ]
         messages_expr = (
-            pl
-            .struct(message_columns)
+            pl.struct(message_columns)
             .map_elements(_create_messages_with_reasoning)
             .alias("messages")
         )
     elif keep_document_in_context:
         message_columns = ["question", "response", "document", "document_outline"]
         messages_expr = (
-            pl
-            .struct(message_columns)
+            pl.struct(message_columns)
             .map_elements(_create_messages_without_reasoning)
             .alias("messages")
         )
     else:
         message_columns = ["question", "response", "document", "document_outline"]
         messages_expr = (
-            pl
-            .struct(message_columns)
+            pl.struct(message_columns)
             .map_elements(_create_messages_without_reasoning_no_document)
             .alias("messages")
         )
@@ -313,8 +307,7 @@ def count_tokens(text: str) -> int:
         return len(tokenizer.encode(text))
 
     return df.with_columns(
-        pl
-        .col(column_name)
+        pl.col(column_name)
         .map_elements(apply_chat_template, return_dtype=pl.String)
         .map_elements(count_tokens, return_dtype=pl.Int32)
         .alias("token_length")
diff --git a/tests/validation/test_notebook_content.py b/tests/validation/test_notebook_content.py
@@ -43,13 +43,22 @@ def test_no_execution_counts(notebook_path, relative_path):
 
 
 def test_no_stored_outputs(notebook_path, relative_path):
-    """Test that notebooks have no stored outputs (should be cleared)."""
+    """Test that notebooks have no stored outputs (should be cleared).
+
+    Cells with 'keep_output' tag in metadata are ignored.
+    """
     with open(notebook_path, encoding="utf-8") as f:
         nb = json.load(f)
 
     cells_with_outputs = []
     for i, cell in enumerate(nb.get("cells", [])):
         if cell.get("cell_type") == "code":
+            # Check if cell has keep_output tag
+            metadata = cell.get("metadata", {})
+            tags = metadata.get("tags", [])
+            if "keep_output" in tags:
+                continue
+
             outputs = cell.get("outputs", [])
             if len(outputs) > 0:
                 cells_with_outputs.append((i, len(outputs)))

Original file line number	Diff line number	Diff line change
`@@ -35,12 +35,6 @@`
`35`	`35`	`"!python3 -m pip install --force-reinstall --no-cache-dir -U ipykernel"`
`36`	`36`	`]`
`37`	`37`	`},`
`38`		`- {`
`39`		`- "cell_type": "markdown",`
`40`		`- "id": "e5fcc948",`
`41`		`- "metadata": {},`
`42`		`- "source": []`
`43`		`- },`
`44`	`38`	`{`
`45`	`39`	`"cell_type": "markdown",`
`46`	`40`	`"id": "49a99fc8-24d5-4040-bd1d-faa7c7e1ef27",`