Skip to content

Commit 10f9509

Browse files
committed
RHOAIENG-33283: Exclude md files from files secret
1 parent 0dece2a commit 10f9509

File tree

2 files changed

+44
-15
lines changed

2 files changed

+44
-15
lines changed

src/codeflare_sdk/ray/rayjobs/runtime_env.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,23 +28,28 @@
2828
# Path where working_dir will be unzipped on submitter pod
2929
UNZIP_PATH = "/tmp/rayjob-working-dir"
3030

31-
# File pattern to exclude from working directory zips
32-
# Jupyter notebooks can contain sensitive outputs, tokens, and large data
31+
# Exclude Jupyter notebook and Markdown files from working directory zips
3332
JUPYTER_NOTEBOOK_PATTERN = r"\.ipynb$"
33+
MARKDOWN_FILE_PATTERN = r"\.md$"
3434

3535

3636
def _should_exclude_file(file_path: str) -> bool:
3737
"""
3838
Check if file should be excluded from working directory zip.
39-
Currently excludes Jupyter notebook files (.ipynb).
39+
Currently excludes:
40+
- Jupyter notebook files (.ipynb)
41+
- Markdown files (.md)
4042
4143
Args:
4244
file_path: Relative file path within the working directory
4345
4446
Returns:
4547
True if file should be excluded, False otherwise
4648
"""
47-
return bool(re.search(JUPYTER_NOTEBOOK_PATTERN, file_path, re.IGNORECASE))
49+
return bool(
50+
re.search(JUPYTER_NOTEBOOK_PATTERN, file_path, re.IGNORECASE)
51+
or re.search(MARKDOWN_FILE_PATTERN, file_path, re.IGNORECASE)
52+
)
4853

4954

5055
def _normalize_runtime_env(
@@ -145,7 +150,7 @@ def _zip_directory(directory_path: str) -> Optional[bytes]:
145150
f"Successfully zipped directory: {directory_path} ({len(zip_data)} bytes)"
146151
)
147152
if excluded_count > 0:
148-
log_message += f" - Excluded {excluded_count} Jupyter notebook files"
153+
log_message += f" - Excluded {excluded_count} file(s) (.ipynb, .md)"
149154
logger.info(log_message)
150155

151156
return zip_data

src/codeflare_sdk/ray/rayjobs/test/test_runtime_env.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ def test_zip_directory_functionality(tmp_path):
575575

576576
def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog):
577577
"""
578-
Test that Jupyter notebook files (.ipynb) are excluded from zip.
578+
Test that Jupyter notebook files (.ipynb) and markdown files (.md) are excluded from zip.
579579
"""
580580
from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory
581581
import zipfile
@@ -594,11 +594,16 @@ def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog):
594594
'{"cells": [], "metadata": {}}'
595595
) # Test case insensitive
596596

597+
# Create markdown files (should be excluded)
598+
(test_dir / "README.md").write_text("# Project Documentation\n")
599+
(test_dir / "CHANGELOG.MD").write_text("# Changes\n") # Test case insensitive
600+
597601
# Create subdirectory with mixed files
598602
sub_dir = test_dir / "notebooks"
599603
sub_dir.mkdir()
600604
(sub_dir / "data_exploration.ipynb").write_text('{"cells": [], "metadata": {}}')
601605
(sub_dir / "helper.py").write_text("print('nested file')")
606+
(sub_dir / "guide.md").write_text("# Guide\n")
602607

603608
# Test zipping
604609
with caplog.at_level("INFO"):
@@ -607,8 +612,8 @@ def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog):
607612
assert zip_data is not None
608613
assert len(zip_data) > 0
609614

610-
# Verify log message includes exclusion count
611-
assert "Excluded 3 Jupyter notebook files" in caplog.text
615+
# Verify log message includes exclusion count (3 ipynb + 3 md = 6 total)
616+
assert "Excluded 6 file(s) (.ipynb, .md)" in caplog.text
612617

613618
# Verify excluded files are not in the zip
614619
zip_buffer = io.BytesIO(zip_data)
@@ -625,10 +630,15 @@ def test_zip_directory_excludes_jupyter_notebooks(tmp_path, caplog):
625630
assert "experiment.IPYNB" not in zip_contents
626631
assert "notebooks/data_exploration.ipynb" not in zip_contents
627632

633+
# Markdown files should be excluded
634+
assert "README.md" not in zip_contents
635+
assert "CHANGELOG.MD" not in zip_contents
636+
assert "notebooks/guide.md" not in zip_contents
637+
628638

629639
def test_zip_directory_no_exclusions_when_no_notebooks(tmp_path, caplog):
630640
"""
631-
Test that no exclusion message is logged when no notebook files exist.
641+
Test that no exclusion message is logged when no notebook or markdown files exist.
632642
"""
633643
from codeflare_sdk.ray.rayjobs.runtime_env import _zip_directory
634644

@@ -646,7 +656,6 @@ def test_zip_directory_no_exclusions_when_no_notebooks(tmp_path, caplog):
646656

647657
# Verify log message does NOT mention exclusions
648658
assert "Excluded" not in caplog.text
649-
assert "Jupyter notebook files" not in caplog.text
650659

651660

652661
def test_should_exclude_file_function():
@@ -661,12 +670,19 @@ def test_should_exclude_file_function():
661670
assert _should_exclude_file("data/exploration.ipynb") is True
662671
assert _should_exclude_file("subdir/nested.Ipynb") is True
663672

673+
# Should exclude .md files (case insensitive)
674+
assert _should_exclude_file("README.md") is True
675+
assert _should_exclude_file("CHANGELOG.MD") is True
676+
assert _should_exclude_file("docs/guide.md") is True
677+
assert _should_exclude_file("subdir/notes.Md") is True
678+
664679
# Should NOT exclude other files
665680
assert _should_exclude_file("script.py") is False
666681
assert _should_exclude_file("data.json") is False
667682
assert _should_exclude_file("requirements.txt") is False
668-
assert _should_exclude_file("README.md") is False
669683
assert _should_exclude_file("model.pkl") is False
684+
assert _should_exclude_file("markdown_parser.py") is False # Not .md
685+
assert _should_exclude_file("test.html") is False
670686

671687

672688
def test_zip_directory_error_handling():
@@ -718,7 +734,7 @@ def test_extract_all_local_files_with_working_dir(tmp_path):
718734

719735
def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog):
720736
"""
721-
Test that extract_all_local_files excludes Jupyter notebooks when zipping working directory.
737+
Test that extract_all_local_files excludes Jupyter notebooks and markdown files when zipping working directory.
722738
"""
723739
import zipfile
724740
import base64
@@ -737,6 +753,10 @@ def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog):
737753
)
738754
(working_dir / "data.ipynb").write_text('{"cells": [], "metadata": {}}')
739755

756+
# Markdown files that should be excluded
757+
(working_dir / "README.md").write_text("# Project Documentation\n")
758+
(working_dir / "CHANGELOG.md").write_text("# Changes\n")
759+
740760
runtime_env = RuntimeEnv(working_dir=str(working_dir))
741761

742762
rayjob = RayJob(
@@ -747,15 +767,15 @@ def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog):
747767
cluster_name="test-cluster",
748768
)
749769

750-
# This should zip the directory and exclude notebooks
770+
# This should zip the directory and exclude notebooks and markdown files
751771
with caplog.at_level("INFO"):
752772
files = extract_all_local_files(rayjob)
753773

754774
assert files is not None
755775
assert "working_dir.zip" in files
756776

757-
# Verify exclusion was logged
758-
assert "Excluded 2 Jupyter notebook files" in caplog.text
777+
# Verify exclusion was logged (2 ipynb + 2 md = 4 total)
778+
assert "Excluded 4 file(s) (.ipynb, .md)" in caplog.text
759779

760780
# Decode and verify zip contents
761781
zip_data = base64.b64decode(files["working_dir.zip"])
@@ -772,6 +792,10 @@ def test_extract_all_local_files_excludes_notebooks(tmp_path, caplog):
772792
assert "analysis.ipynb" not in zip_contents
773793
assert "data.ipynb" not in zip_contents
774794

795+
# Markdown files should be excluded
796+
assert "README.md" not in zip_contents
797+
assert "CHANGELOG.md" not in zip_contents
798+
775799

776800
def test_extract_single_entrypoint_file_error_handling(tmp_path):
777801
"""

0 commit comments

Comments
 (0)