Skip to content

Commit ced40e9

Browse files
committed
Rename missing_df to filtered_df
1 parent 212011f commit ced40e9

File tree

2 files changed

+12
-12
lines changed

2 files changed

+12
-12
lines changed

src/cautiousrobot/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def check_existing_images(csv_path, img_dir, source_df, filename_col, subfolders
104104
105105
Returns:
106106
updated_df (pd.DataFrame): DataFrame with new column 'in_img_dir' indicating presence in img_dir.
107-
missing_df (pd.DataFrame): DataFrame filtered to only files not present in img_dir.
107+
filtered_df (pd.DataFrame): DataFrame filtered to only files not present in img_dir.
108108
"""
109109
# Create a copy to avoid modifying the original DataFrame
110110
df = source_df.copy()
@@ -148,15 +148,15 @@ def check_existing_images(csv_path, img_dir, source_df, filename_col, subfolders
148148
df = df.drop(columns=["expected_path"])
149149

150150
# Create filtered DataFrame
151-
missing_df = df[~df["in_img_dir"]].copy()
151+
filtered_df = df[~df["in_img_dir"]].copy()
152152

153153
# Exit if all images are already there
154-
if missing_df.empty:
154+
if filtered_df.empty:
155155
sys.exit(f"'{img_dir}' already contains all images. Exited without executing.")
156156
else:
157157
# Print directory status message - pre-download
158158
num_existing = len(existing_files)
159159
expected_num = df.shape[0] - (starting_idx)
160160
print(f"There are {num_existing} files in {img_dir}. Based on {csv_path}, there should be {expected_num} images.")
161161

162-
return df, missing_df
162+
return df, filtered_df

tests/test_existing_images.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,27 @@ def setUp(self):
1616
@patch("cautiousrobot.utils.os.path.exists", return_value=False)
1717
def test_directory_does_not_exist(self, mock_exists):
1818
"""If image directory doesn't exist, all images marked as not in directory."""
19-
updated_df, missing_df = check_existing_images(
19+
updated_df, filtered_df = check_existing_images(
2020
self.csv_path, self.img_dir, self.sample_df.copy(), self.filename_col
2121
)
2222

2323
self.assertFalse(any(updated_df["in_img_dir"]))
24-
self.assertEqual(len(missing_df), len(self.sample_df))
24+
self.assertEqual(len(filtered_df), len(self.sample_df))
2525
mock_exists.assert_called_once_with(self.img_dir)
2626

2727
@patch("cautiousrobot.utils.os.path.exists", return_value=True)
2828
@patch("cautiousrobot.utils.gather_file_paths", return_value=["test_images/a.jpg"])
2929
@patch("cautiousrobot.utils.print")
3030
def test_some_files_exist(self, mock_print, mock_gather, mock_exists):
3131
"""Should mark existing files correctly and print status."""
32-
updated_df, missing_df = check_existing_images(
32+
updated_df, filtered_df = check_existing_images(
3333
self.csv_path, self.img_dir, self.sample_df.copy(), self.filename_col
3434
)
3535

3636
self.assertTrue(updated_df.loc[0, "in_img_dir"]) # a.jpg exists
3737
self.assertFalse(updated_df.loc[1, "in_img_dir"]) # b.jpg missing
3838
self.assertFalse(updated_df.loc[2, "in_img_dir"]) # c.jpg missing
39-
self.assertEqual(len(missing_df), 2)
39+
self.assertEqual(len(filtered_df), 2)
4040
mock_print.assert_called_once()
4141
self.assertIn("There are 1 files", mock_print.call_args[0][0])
4242

@@ -57,12 +57,12 @@ def test_all_files_exist_exits(self, mock_gather, mock_exists):
5757
@patch("cautiousrobot.utils.print")
5858
def test_no_files_exist(self, mock_print, mock_gather, mock_exists):
5959
"""If no files exist, should mark all as missing and print message."""
60-
updated_df, missing_df = check_existing_images(
60+
updated_df, filtered_df = check_existing_images(
6161
self.csv_path, self.img_dir, self.sample_df.copy(), self.filename_col
6262
)
6363

6464
self.assertFalse(any(updated_df["in_img_dir"]))
65-
self.assertEqual(len(missing_df), len(self.sample_df))
65+
self.assertEqual(len(filtered_df), len(self.sample_df))
6666
mock_print.assert_called_once()
6767
self.assertIn("There are 0 files", mock_print.call_args[0][0])
6868

@@ -75,14 +75,14 @@ def test_subfolders_handling(self, mock_gather, mock_exists):
7575
self.filename_col: ["a.jpg", "b.jpg"]
7676
})
7777

78-
updated_df, missing_df = check_existing_images(
78+
updated_df, filtered_df = check_existing_images(
7979
self.csv_path, self.img_dir, sub_df.copy(), self.filename_col, subfolders="subfolder"
8080
)
8181

8282
# species1/a.jpg should be marked present, species2/b.jpg missing
8383
self.assertTrue(updated_df.loc[0, "in_img_dir"])
8484
self.assertFalse(updated_df.loc[1, "in_img_dir"])
85-
self.assertEqual(len(missing_df), 1)
85+
self.assertEqual(len(filtered_df), 1)
8686

8787

8888
if __name__ == "__main__":

0 commit comments

Comments
 (0)