-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathraw_processor.py
More file actions
206 lines (181 loc) · 7.31 KB
/
raw_processor.py
File metadata and controls
206 lines (181 loc) · 7.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
"""Raw image folder pipeline — detect, validate, zip, and hand off to Comics_in."""
import os
import time
import uuid
import shutil
import zipfile
import threading
from processor import log
COMICS_RAW = '/Comics_raw'
COMICS_RAW_PROCESSED = '/Comics_raw/processed'
COMICS_RAW_UNPROCESSED = '/Comics_raw/unprocessed'
COMICS_IN = '/Comics_in'
IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp'}
IGNORABLE_FILES = {'.ds_store', 'thumbs.db', 'desktop.ini', '.localized'}
STABILITY_SECONDS = 30
RAW_PROCESSING_LOCKS = set()
raw_lock_mutex = threading.Lock()
def is_folder_stable(folderpath: str) -> bool:
"""Return True if folderpath exists, is non-empty, and no file has been
modified within the last STABILITY_SECONDS seconds."""
try:
entries = os.listdir(folderpath)
except OSError:
return False
newest_mtime = 0.0
found_any = False
for entry in entries:
full = os.path.join(folderpath, entry)
try:
mtime = os.path.getmtime(full)
found_any = True
if mtime > newest_mtime:
newest_mtime = mtime
except OSError:
pass
if not found_any:
return False
return (time.time() - newest_mtime) >= STABILITY_SECONDS
def _available_cbz_path(folder_name: str) -> str:
candidate = os.path.join(COMICS_IN, folder_name + '.cbz')
if not os.path.exists(candidate):
return candidate
counter = 2
while True:
candidate = os.path.join(COMICS_IN, f"{folder_name}_{counter}.cbz")
if not os.path.exists(candidate):
return candidate
counter += 1
def _available_dest_path(parent_dir: str, name: str) -> str:
candidate = os.path.join(parent_dir, name)
if not os.path.exists(candidate):
return candidate
counter = 2
while True:
candidate = os.path.join(parent_dir, f"{name}_{counter}")
if not os.path.exists(candidate):
return candidate
counter += 1
def process_raw_folder(folderpath: str) -> None:
"""Validate, zip, and dispatch a flat image folder into the Comics_in pipeline.
Rejects folders that contain subfolders or no image files, moving them to
Comics_raw/unprocessed/ with a log message explaining why. On success the
original folder is moved to Comics_raw/processed/.
"""
short = os.path.basename(folderpath)[:40]
temp_cbz = os.path.join(os.environ.get("TMPDIR", "/tmp"), uuid.uuid4().hex + ".cbz")
try:
try:
entries = os.listdir(folderpath)
except OSError as e:
log(f">>> RAW ERROR: {short} — cannot read folder: {e}")
return
subfolders = [e for e in entries if os.path.isdir(os.path.join(folderpath, e))]
if subfolders:
log(f">>> RAW SKIPPED: {short} — contains subfolders ({', '.join(subfolders)}). Flatten all images into a single folder with no subdirectories, then drop it into Comics_raw again.")
os.makedirs(COMICS_RAW_UNPROCESSED, exist_ok=True)
dest = _available_dest_path(COMICS_RAW_UNPROCESSED, os.path.basename(folderpath))
shutil.move(folderpath, dest)
return
image_files = []
for entry in entries:
filepath = os.path.join(folderpath, entry)
if not os.path.isfile(filepath):
continue
ext = os.path.splitext(entry)[1].lower()
if ext in IMAGE_EXTS:
image_files.append(filepath)
elif entry.lower() in IGNORABLE_FILES:
pass
else:
log(f">>> RAW WARNING: {short} — skipping non-image file: {entry}")
if not image_files:
log(f">>> RAW SKIPPED: {short} — no image files found. Fix the folder contents and drop it into Comics_raw again.")
os.makedirs(COMICS_RAW_UNPROCESSED, exist_ok=True)
dest = _available_dest_path(COMICS_RAW_UNPROCESSED, os.path.basename(folderpath))
shutil.move(folderpath, dest)
return
folder_name = os.path.basename(folderpath)
log(f">>> RAW ZIPPING: {short} ({len(image_files)} images)")
with zipfile.ZipFile(temp_cbz, 'w', zipfile.ZIP_STORED) as zf:
for img in sorted(image_files, key=lambda x: os.path.basename(x).lower()):
zf.write(img, os.path.basename(img))
os.makedirs(COMICS_IN, exist_ok=True)
cbz_path = _available_cbz_path(folder_name)
shutil.move(temp_cbz, cbz_path)
os.makedirs(COMICS_RAW_PROCESSED, exist_ok=True)
processed_dest = _available_dest_path(COMICS_RAW_PROCESSED, folder_name)
shutil.move(folderpath, processed_dest)
log(f">>> RAW SUCCESS: {short} -> {os.path.basename(cbz_path)}")
except Exception as e:
log(f">>> RAW ERROR: {short} — {e}")
if os.path.exists(temp_cbz):
try:
os.remove(temp_cbz)
except OSError:
pass
if os.path.exists(folderpath):
try:
os.makedirs(COMICS_RAW_UNPROCESSED, exist_ok=True)
dest = _available_dest_path(COMICS_RAW_UNPROCESSED, os.path.basename(folderpath))
shutil.move(folderpath, dest)
except OSError:
pass
finally:
with raw_lock_mutex:
RAW_PROCESSING_LOCKS.discard(folderpath)
def scan_raw_directories() -> None:
try:
entries = os.listdir(COMICS_RAW)
except OSError:
return
for entry in entries:
if entry in ('processed', 'unprocessed'):
continue
full_path = os.path.join(COMICS_RAW, entry)
if not os.path.isdir(full_path):
continue
if not is_folder_stable(full_path):
continue
with raw_lock_mutex:
if full_path not in RAW_PROCESSING_LOCKS:
RAW_PROCESSING_LOCKS.add(full_path)
threading.Thread(target=process_raw_folder, args=(full_path,), daemon=True).start()
def raw_watch_loop() -> None:
while True:
try:
scan_raw_directories()
except Exception as e:
log(f">>> RAW SCAN ERROR: {e}")
time.sleep(10)
def raw_inotify_watch_loop() -> None:
"""Inotify-based watcher for Comics_raw.
Triggers scan_raw_directories() immediately when anything appears in
Comics_raw. The 30-second stability check inside is_folder_stable still
runs on every trigger, so partially-transferred folders are never processed
early — inotify just removes the 10-second polling delay.
"""
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class _RawHandler(FileSystemEventHandler):
def _trigger(self) -> None:
try:
scan_raw_directories()
except Exception as e:
log(f">>> RAW SCAN ERROR: {e}")
def on_created(self, event) -> None: # type: ignore[override]
self._trigger()
def on_moved(self, event) -> None: # type: ignore[override]
self._trigger()
scan_raw_directories()
observer = Observer()
observer.schedule(_RawHandler(), COMICS_RAW, recursive=True)
observer.start()
try:
while True:
time.sleep(1)
except Exception:
pass
finally:
observer.stop()
observer.join()