Skip to content

Commit c98c744

Browse files
feat(ui): progress bar mostra stadi di elaborazione in tempo reale (extracting, chunking, embedding, uploading)
1 parent a979139 commit c98c744

File tree

5 files changed

+53
-6
lines changed

5 files changed

+53
-6
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
### Added
11+
- Progress bar now shows processing stages (extracting, chunking, embedding, uploading) in real-time
12+
1013
### Changed
1114
- Batch embedding using Ollama /api/embed endpoint, reducing API calls from N to N/10 for faster uploads
1215

api/routes/upload.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,21 @@ def run_indexing(job_id: str, collection_dir: Path, collection: str, filenames:
131131

132132
jobs[job_id]["progress"] = 0.2
133133
jobs[job_id]["message"] = f"Processing with {'Tika' if use_tika else 'text-only'} mode"
134+
jobs[job_id]["stage"] = "initializing"
135+
136+
# Progress callback to update job stage
137+
def update_progress(stage: str, progress: float):
138+
jobs[job_id]["stage"] = stage
139+
jobs[job_id]["progress"] = progress
134140

135141
# Create and run pipeline
136142
pipeline = RagifyPipeline(config, use_tika=use_tika)
137-
stats = pipeline.process_directory(collection_dir)
143+
stats = pipeline.process_directory(collection_dir, progress_callback=update_progress)
138144

139145
# Update job with results
140146
jobs[job_id]["progress"] = 1.0
141147
jobs[job_id]["status"] = "completed"
148+
jobs[job_id]["stage"] = "completed"
142149
jobs[job_id]["message"] = (
143150
f"Indexed {stats['processed']}/{stats['processed'] + stats['failed']} files, "
144151
f"{stats['chunks']} chunks, {stats['skipped']} skipped"
@@ -158,6 +165,7 @@ def run_indexing(job_id: str, collection_dir: Path, collection: str, filenames:
158165
logger.error(f"[{job_id}] Stack trace:\n{stack_trace}")
159166

160167
jobs[job_id]["status"] = "failed"
168+
jobs[job_id]["stage"] = "failed"
161169
jobs[job_id]["message"] = error_msg
162170
jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
163171

@@ -206,6 +214,7 @@ async def upload_file(
206214
jobs[job_id] = {
207215
"job_id": job_id,
208216
"status": "pending",
217+
"stage": "pending",
209218
"collection": collection,
210219
"filename": file.filename,
211220
"progress": 0.0,
@@ -279,6 +288,7 @@ async def upload_multiple_files(
279288
jobs[job_id] = {
280289
"job_id": job_id,
281290
"status": "pending",
291+
"stage": "pending",
282292
"collection": collection,
283293
"filename": f"{len(saved_files)} files",
284294
"progress": 0.0,

frontend/index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,13 @@ <h2>Document Input</h2>
170170
<h2>Active Operations</h2>
171171
<div class="jobs-list">
172172
<template x-for="job in jobs" :key="job.job_id">
173-
<div class="job-item" :class="'job-' + job.status">
173+
<div class="job-item" :class="'job-' + (job.stage || job.status)">
174174
<div class="job-info">
175175
<span class="job-filename" x-text="job.filename"></span>
176176
<span class="job-collection" x-text="job.collection"></span>
177177
</div>
178178
<div class="job-status">
179-
<span class="job-badge" x-text="job.status"></span>
179+
<span class="job-badge" x-text="job.stage || job.status"></span>
180180
<div class="progress-bar" x-show="job.status === 'running'">
181181
<div class="progress-fill" :style="'width: ' + (job.progress * 100) + '%'"></div>
182182
</div>

frontend/static/style.css

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,30 @@ body {
615615
border: 1px solid var(--red);
616616
}
617617

618+
/* Pipeline stages */
619+
.job-initializing .job-badge,
620+
.job-extracting .job-badge {
621+
background: rgba(255,179,71,0.2);
622+
color: var(--amber);
623+
border: 1px solid var(--amber);
624+
animation: pulse 1s infinite;
625+
}
626+
627+
.job-chunking .job-badge,
628+
.job-embedding .job-badge {
629+
background: rgba(57,255,20,0.2);
630+
color: var(--green);
631+
border: 1px solid var(--green);
632+
animation: pulse 1s infinite;
633+
}
634+
635+
.job-uploading .job-badge {
636+
background: rgba(57,255,20,0.3);
637+
color: var(--green);
638+
border: 1px solid var(--green);
639+
animation: pulse 0.5s infinite;
640+
}
641+
618642
@keyframes pulse {
619643
0%, 100% { opacity: 1; }
620644
50% { opacity: 0.5; }

ragify.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,12 +213,13 @@ def _ensure_collection_exists(self) -> None:
213213
except Exception as e:
214214
self.logger.warning(f"Failed to ensure collection exists: {e}")
215215

216-
def process_directory(self, root_path: Path) -> Dict:
216+
def process_directory(self, root_path: Path, progress_callback=None) -> Dict:
217217
"""
218218
Process all files in a directory.
219219
220220
Args:
221221
root_path: Root directory to process
222+
progress_callback: Optional callback(stage, progress) for progress updates
222223
223224
Returns:
224225
Processing statistics
@@ -244,7 +245,7 @@ def process_directory(self, root_path: Path) -> Dict:
244245
with tqdm(total=len(files), desc="Processing files", unit="file") as pbar:
245246
for file_path in files:
246247
try:
247-
self.process_file(file_path, pbar)
248+
self.process_file(file_path, pbar, progress_callback)
248249
except Exception as e:
249250
self.logger.error(f"Fatal error processing {file_path}: {e}")
250251
self.stats.failed_files += 1
@@ -262,13 +263,14 @@ def process_directory(self, root_path: Path) -> Dict:
262263
'duration': self.stats.duration()
263264
}
264265

265-
def process_file(self, file_path: Path, pbar: tqdm) -> None:
266+
def process_file(self, file_path: Path, pbar: tqdm, progress_callback=None) -> None:
266267
"""
267268
Process a single file through the pipeline.
268269
269270
Args:
270271
file_path: Path to file
271272
pbar: Progress bar to update
273+
progress_callback: Optional callback(stage, progress) for progress updates
272274
"""
273275
file_size = file_path.stat().st_size
274276
self.stats.total_bytes += file_size
@@ -292,6 +294,8 @@ def process_file(self, file_path: Path, pbar: tqdm) -> None:
292294

293295
# 2. Extract text and metadata
294296
pbar.set_postfix_str(f"Extracting: {file_path.name}")
297+
if progress_callback:
298+
progress_callback("extracting", 0.2)
295299
text, metadata = extract_file_content(file_path)
296300

297301
if not text:
@@ -314,6 +318,8 @@ def process_file(self, file_path: Path, pbar: tqdm) -> None:
314318

315319
# 4. Chunk text (type-specific if implemented)
316320
pbar.set_postfix_str(f"Chunking: {file_path.name}")
321+
if progress_callback:
322+
progress_callback("chunking", 0.4)
317323
chunks = self.chunk_by_type(cleaned, file_path)
318324

319325
if not chunks:
@@ -325,6 +331,8 @@ def process_file(self, file_path: Path, pbar: tqdm) -> None:
325331

326332
# 5. Generate embeddings
327333
pbar.set_postfix_str(f"Embedding: {file_path.name} ({len(chunks)} chunks)")
334+
if progress_callback:
335+
progress_callback("embedding", 0.6)
328336
embedded_chunks = batch_embed_chunks(chunks, max_tokens=self.config.chunking.max_tokens)
329337

330338
if not embedded_chunks:
@@ -349,6 +357,8 @@ def process_file(self, file_path: Path, pbar: tqdm) -> None:
349357

350358
# 7. Upload to Qdrant
351359
pbar.set_postfix_str(f"Uploading: {file_path.name}")
360+
if progress_callback:
361+
progress_callback("uploading", 0.8)
352362
from lib.qdrant_operations import upload_points
353363

354364
success = True

0 commit comments

Comments
 (0)