Skip to content

Commit 81c60d2

Browse files
committed
Samples were not correctly removed in remove_bad_files
1 parent 826ff10 commit 81c60d2

File tree

3 files changed

+40
-3
lines changed

3 files changed

+40
-3
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ jobs:
117117
remove_bad_files: # This is turned on
118118
# remove_bad_files: # This is turned off
119119
120-
## Note that this is different from docker-compose (Methods 1 and 2)
120+
## Note that this is different from docker-compose (where both exsmples above would be turned off; in docker, "true" or additional options are required as value next to the key)
121121
```
122122

123123

@@ -154,7 +154,7 @@ services:
154154
```
155155

156156

157-
### Docker: Specifying all settings in docker-compose
157+
#### Docker: Specifying all settings in docker-compose
158158

159159
As noted above, the [recommended approach for docker](#docker-docker-compose-together-with-configyaml) setups is usage of a config.yaml, as the below approach may bloat your docker-compose and may cause you some headache to adhere to all required notation rules of compose
160160

src/jobs/remove_bad_files.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ def _get_stoppable_files(self, torrent_files):
150150
if self._is_bad_extension(file):
151151
reasons.append(f"Bad extension: {file['file_extension']}")
152152

153+
# Check for bad keywords
154+
if self._contains_bad_keyword(file):
155+
reasons.append("Contains bad keyword in path")
156+
153157
# Check if the file has low availability
154158
if self._is_complete_partial(file):
155159
reasons.append(f"Low availability: {file['availability'] * 100:.1f}%")
@@ -165,6 +169,15 @@ def _is_bad_extension(self, file):
165169
"""Check if the file has a bad extension."""
166170
return file['file_extension'].lower() not in self.good_extensions
167171

172+
def _contains_bad_keyword(self, file):
173+
"""Check if the file path contains a bad keyword and is smaller than the limit."""
174+
file_path = file.get("name", "").lower()
175+
file_size_mb = file.get("size", 0) / 1024 / 1024
176+
177+
return (
178+
any(keyword.lower() in file_path for keyword in self.bad_keywords)
179+
and file_size_mb <= self.bad_keyword_limit
180+
)
168181

169182

170183
def _is_complete_partial(self, file):

tests/jobs/test_remove_bad_files.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def fixture_removal_job(arr):
3636
("file.mkv", False), # Good extension
3737
("file.avi", False), # Good extension
3838
("file.exe", True), # Bad extension
39-
("file.sample", True), # Bad extension
39+
("file.jpg", True), # Bad extension
4040
],
4141
)
4242
def test_is_bad_extension(removal_job, file_name, expected_result):
@@ -49,6 +49,30 @@ def test_is_bad_extension(removal_job, file_name, expected_result):
4949
# Assert
5050
assert result == expected_result
5151

52+
@pytest.mark.parametrize(
53+
"name, size_bytes, expected_result",
54+
[
55+
("My.Movie.2024.2160/Subfolder/sample.mkv", 100 * 1024, True), # 100 KB, 'sample' keyword in filename
56+
("My.Movie.2024.2160/Subfolder/Sample.mkv", 100 * 1024, True), # 100 KB, case-insensitive match
57+
("My.Movie.2024.2160/Subfolder/sample movie.mkv", 100 * 1024, True), # 100 KB, 'sample' keyword with space
58+
("My.Movie.2024.2160/Subfolder/samplemovie.mkv", 100 * 1024, True), # 100 KB, 'sample' keyword concatenated
59+
("My.Movie.2024.2160/Subfolder/Movie sample.mkv", 100 * 1024, True), # 100 KB, 'sample' keyword at end
60+
("My.Movie.2024.2160/Sample/Movie.mkv", 100 * 1024, True), # 100 KB, 'sample' keyword in folder name
61+
("My.Movie.2024.2160/sample/Movie.mkv", 100 * 1024, True), # 100 KB, lowercase folder name
62+
("My.Movie.2024.2160/Samples/Movie.mkv", 100 * 1024, True), # 100 KB, plural form in folder name
63+
("My.Movie.2024.2160/Big Samples/Movie.mkv", 700 * 1024 * 1024, False), # 700 MB, large file, should NOT be flagged
64+
("My.Movie.2024.2160/Some Folder/Movie.mkv", 100 * 1024, False), # 100 KB, no 'sample' keyword, should not flag
65+
],
66+
)
67+
def test_contains_bad_keyword(removal_job, name, size_bytes, expected_result):
68+
"""Test detection of bad keywords with uniform small size except a large sample file."""
69+
file = {
70+
"name": name,
71+
"size": size_bytes,
72+
}
73+
result = removal_job._contains_bad_keyword(file) # pylint: disable=W0212
74+
assert result == expected_result
75+
5276

5377
@pytest.mark.parametrize(
5478
"file, is_incomplete_partial",

0 commit comments

Comments
 (0)