Skip to content

Commit e6c0698

Browse files
chore(release): rilascia versione 1.2.0
1 parent c98c744 commit e6c0698

File tree

5 files changed

+290
-23
lines changed

5 files changed

+290
-23
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,17 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
## [1.2.0] - 2025-12-02
11+
1012
### Added
1113
- Progress bar now shows processing stages (extracting, chunking, embedding, uploading) in real-time
14+
- Client-side ZIP compression for multi-file uploads, reducing N HTTP requests to 1 and improving upload speed
15+
- New `/api/upload-zip` endpoint for server-side ZIP extraction and batch processing
16+
- Browser-side progress feedback: "Zipping..." and "Uploading..." phases before server processing
1217

1318
### Changed
1419
- Batch embedding using Ollama /api/embed endpoint, reducing API calls from N to N/10 for faster uploads
20+
- Multi-file uploads now automatically use ZIP compression (threshold: >1 file or >5MB total)
1521

1622
### Fixed
1723
- Silenced verbose httpx/httpcore logs that spammed 60+ lines per file upload

api/routes/upload.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import uuid
1111
import logging
1212
import traceback
13+
import zipfile
1314
from pathlib import Path
1415
from typing import Optional, List
1516
from datetime import datetime
@@ -313,6 +314,180 @@ async def upload_multiple_files(
313314
)
314315

315316

317+
def run_zip_indexing(job_id: str, zip_path: Path, collection_dir: Path, collection: str):
318+
"""
319+
Extract ZIP and run indexing pipeline.
320+
321+
Args:
322+
job_id: Job identifier for tracking
323+
zip_path: Path to uploaded ZIP file
324+
collection_dir: Target directory for extracted files
325+
collection: Target collection name
326+
"""
327+
logger.info(f"[{job_id}] Starting ZIP extraction from {zip_path}")
328+
329+
try:
330+
jobs[job_id]["status"] = "running"
331+
jobs[job_id]["stage"] = "extracting_zip"
332+
jobs[job_id]["message"] = "Extracting ZIP archive"
333+
jobs[job_id]["progress"] = 0.05
334+
335+
# Extract ZIP, filtering out macOS metadata and hidden files
336+
extracted_files = []
337+
with zipfile.ZipFile(zip_path, 'r') as zf:
338+
for name in zf.namelist():
339+
# Skip macOS metadata, hidden files, and directories
340+
if name.startswith('__MACOSX') or name.startswith('.') or name.endswith('/'):
341+
continue
342+
# Skip nested hidden files (e.g., folder/.hidden)
343+
if '/.' in name:
344+
continue
345+
346+
zf.extract(name, collection_dir)
347+
extracted_files.append(name)
348+
349+
# Remove ZIP after extraction
350+
zip_path.unlink()
351+
352+
logger.info(f"[{job_id}] Extracted {len(extracted_files)} files from ZIP")
353+
jobs[job_id]["message"] = f"Extracted {len(extracted_files)} files"
354+
jobs[job_id]["progress"] = 0.15
355+
jobs[job_id]["filename"] = f"{len(extracted_files)} files"
356+
357+
if not extracted_files:
358+
jobs[job_id]["status"] = "completed"
359+
jobs[job_id]["stage"] = "completed"
360+
jobs[job_id]["message"] = "ZIP was empty or contained only hidden files"
361+
jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
362+
return
363+
364+
# Import RagifyPipeline
365+
from ragify import RagifyPipeline
366+
from lib.config import RagifyConfig
367+
from lib.tika_check import check_tika_available
368+
369+
# Configure
370+
config = RagifyConfig.default()
371+
config.qdrant.collection = collection
372+
373+
# Check Tika availability
374+
tika_status = check_tika_available()
375+
use_tika = tika_status['can_use_tika']
376+
logger.info(f"[{job_id}] Tika available: {use_tika}")
377+
378+
# Progress callback
379+
def update_progress(stage: str, progress: float):
380+
# Scale progress: extraction was 0-0.15, pipeline is 0.15-1.0
381+
scaled_progress = 0.15 + (progress * 0.85)
382+
jobs[job_id]["stage"] = stage
383+
jobs[job_id]["progress"] = scaled_progress
384+
385+
# Run pipeline
386+
pipeline = RagifyPipeline(config, use_tika=use_tika)
387+
stats = pipeline.process_directory(collection_dir, progress_callback=update_progress)
388+
389+
# Update job with results
390+
jobs[job_id]["progress"] = 1.0
391+
jobs[job_id]["status"] = "completed"
392+
jobs[job_id]["stage"] = "completed"
393+
jobs[job_id]["message"] = (
394+
f"Indexed {stats['processed']}/{stats['processed'] + stats['failed']} files, "
395+
f"{stats['chunks']} chunks"
396+
)
397+
jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
398+
399+
logger.info(f"[{job_id}] ZIP indexing COMPLETED: {stats['processed']} files, {stats['chunks']} chunks")
400+
401+
except Exception as e:
402+
error_msg = str(e)
403+
logger.error(f"[{job_id}] ZIP indexing FAILED: {error_msg}")
404+
logger.error(f"[{job_id}] Stack trace:\n{traceback.format_exc()}")
405+
406+
# Cleanup ZIP if still exists
407+
if zip_path.exists():
408+
try:
409+
zip_path.unlink()
410+
except Exception:
411+
pass
412+
413+
jobs[job_id]["status"] = "failed"
414+
jobs[job_id]["stage"] = "failed"
415+
jobs[job_id]["message"] = error_msg
416+
jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
417+
418+
419+
@router.post("/upload-zip")
420+
async def upload_zip(
421+
background_tasks: BackgroundTasks,
422+
file: UploadFile = File(...),
423+
collection: str = Form(default="documentation")
424+
):
425+
"""
426+
Upload a ZIP file for extraction and indexing.
427+
428+
The ZIP is extracted server-side, then all files are processed
429+
by RagifyPipeline as a single job.
430+
431+
Args:
432+
file: ZIP file to upload
433+
collection: Target collection name
434+
435+
Returns:
436+
dict: Job information
437+
"""
438+
# Trigger cleanup
439+
cleanup_old_files()
440+
441+
# Validate file
442+
if not file.filename:
443+
raise HTTPException(status_code=400, detail="No filename provided")
444+
445+
if not file.filename.endswith('.zip'):
446+
raise HTTPException(status_code=400, detail="File must be a ZIP archive")
447+
448+
# Create collection directory
449+
collection_dir = COLLECTIONS_DIR / collection
450+
collection_dir.mkdir(parents=True, exist_ok=True)
451+
452+
# Save ZIP temporarily with unique name
453+
zip_path = collection_dir / f"_upload_{uuid.uuid4().hex}.zip"
454+
try:
455+
content = await file.read()
456+
zip_path.write_bytes(content)
457+
logger.info(f"Saved ZIP: {zip_path} ({len(content)} bytes)")
458+
except Exception as e:
459+
raise HTTPException(status_code=500, detail=f"Failed to save ZIP: {e}")
460+
461+
# Create job record
462+
job_id = str(uuid.uuid4())
463+
jobs[job_id] = {
464+
"job_id": job_id,
465+
"status": "pending",
466+
"stage": "pending",
467+
"collection": collection,
468+
"filename": "ZIP archive",
469+
"progress": 0.0,
470+
"message": "ZIP uploaded, extraction starting",
471+
"created_at": datetime.utcnow().isoformat(),
472+
"completed_at": None
473+
}
474+
475+
# Start background processing
476+
background_tasks.add_task(
477+
run_zip_indexing,
478+
job_id,
479+
zip_path,
480+
collection_dir,
481+
collection
482+
)
483+
484+
return JobCreate(
485+
job_id=job_id,
486+
status="pending",
487+
message=f"ZIP uploaded to collection '{collection}', extraction and indexing started"
488+
)
489+
490+
316491
@router.get("/jobs/{job_id}")
317492
async def get_job_status(job_id: str):
318493
"""

frontend/index.html

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
<title>Ragify - Control Board</title>
77
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg">
88
<link rel="apple-touch-icon" href="/static/favicon.svg">
9+
<script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
910
<script defer src="https://unpkg.com/[email protected]/dist/cdn.min.js"></script>
1011
<link rel="stylesheet" href="/static/style.css">
1112
</head>
@@ -163,6 +164,14 @@ <h2>Document Input</h2>
163164
<span x-show="uploading">Processing...</span>
164165
</button>
165166
</div>
167+
168+
<!-- Local Upload Progress (browser-side) -->
169+
<div class="upload-local-progress" x-show="uploadPhase">
170+
<div class="upload-phase-label" x-text="uploadPhase === 'zipping' ? 'Zipping files...' : uploadPhase === 'uploading' ? 'Uploading...' : ''"></div>
171+
<div class="progress-bar">
172+
<div class="progress-fill" :style="'width: ' + (uploadLocalProgress * 100) + '%'"></div>
173+
</div>
174+
</div>
166175
</section>
167176

168177
<!-- Jobs Panel -->

frontend/static/app.js

Lines changed: 80 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ function ragifyApp() {
2525
uploadQueue: [],
2626
uploading: false,
2727
dragOver: false,
28+
uploadPhase: '', // 'zipping', 'uploading', ''
29+
uploadLocalProgress: 0,
2830

2931
// Search
3032
searchCollection: '',
@@ -180,38 +182,94 @@ function ragifyApp() {
180182

181183
async startUpload() {
182184
if (this.uploadQueue.length === 0 || this.uploading) return;
183-
184185
this.uploading = true;
185186

186-
for (const file of this.uploadQueue) {
187-
try {
188-
const formData = new FormData();
189-
formData.append('file', file);
190-
formData.append('collection', this.uploadCollection);
191-
192-
const res = await fetch('/api/upload', {
193-
method: 'POST',
194-
body: formData,
195-
credentials: 'include'
196-
});
197-
198-
if (res.ok) {
199-
const data = await res.json();
200-
this.showToast(`Uploaded: ${file.name} (Job: ${data.job_id?.slice(0,8) || 'queued'})`, 'success');
201-
} else {
202-
const error = await res.json().catch(() => ({}));
203-
this.showToast(`Failed: ${file.name} - ${error.detail || res.statusText}`, 'error');
204-
}
205-
} catch (e) {
206-
this.showToast(`Failed: ${file.name} - ${e.message || 'Network error'}`, 'error');
187+
// Decide: ZIP if multiple files OR total size > 5MB
188+
const totalSize = this.uploadQueue.reduce((sum, f) => sum + f.size, 0);
189+
const shouldZip = this.uploadQueue.length > 1 || totalSize > 5 * 1024 * 1024;
190+
191+
try {
192+
if (shouldZip) {
193+
await this.uploadAsZip();
194+
} else {
195+
await this.uploadSingleFile();
207196
}
197+
} catch (e) {
198+
this.showToast(`Upload failed: ${e.message || 'Unknown error'}`, 'error');
208199
}
209200

210201
this.uploadQueue = [];
211202
this.uploading = false;
203+
this.uploadPhase = '';
204+
this.uploadLocalProgress = 0;
212205
await this.loadJobs();
213206
},
214207

208+
async uploadSingleFile() {
209+
const file = this.uploadQueue[0];
210+
this.uploadPhase = 'uploading';
211+
this.uploadLocalProgress = 0;
212+
213+
const formData = new FormData();
214+
formData.append('file', file);
215+
formData.append('collection', this.uploadCollection);
216+
217+
const res = await fetch('/api/upload', {
218+
method: 'POST',
219+
body: formData,
220+
credentials: 'include'
221+
});
222+
223+
if (res.ok) {
224+
const data = await res.json();
225+
this.showToast(`Uploaded: ${file.name} (Job: ${data.job_id?.slice(0,8) || 'queued'})`, 'success');
226+
} else {
227+
const error = await res.json().catch(() => ({}));
228+
throw new Error(error.detail || res.statusText);
229+
}
230+
},
231+
232+
async uploadAsZip() {
233+
// Phase 1: Zipping
234+
this.uploadPhase = 'zipping';
235+
this.uploadLocalProgress = 0;
236+
237+
const zip = new JSZip();
238+
for (const file of this.uploadQueue) {
239+
zip.file(file.name, file);
240+
}
241+
242+
const blob = await zip.generateAsync({
243+
type: 'blob',
244+
compression: 'DEFLATE',
245+
compressionOptions: { level: 6 }
246+
}, (meta) => {
247+
this.uploadLocalProgress = meta.percent / 100;
248+
});
249+
250+
// Phase 2: Uploading
251+
this.uploadPhase = 'uploading';
252+
this.uploadLocalProgress = 0;
253+
254+
const formData = new FormData();
255+
formData.append('file', blob, 'upload.zip');
256+
formData.append('collection', this.uploadCollection);
257+
258+
const res = await fetch('/api/upload-zip', {
259+
method: 'POST',
260+
body: formData,
261+
credentials: 'include'
262+
});
263+
264+
if (res.ok) {
265+
const data = await res.json();
266+
this.showToast(`Uploaded ${this.uploadQueue.length} files as ZIP (Job: ${data.job_id?.slice(0,8) || 'queued'})`, 'success');
267+
} else {
268+
const error = await res.json().catch(() => ({}));
269+
throw new Error(error.detail || res.statusText);
270+
}
271+
},
272+
215273
// Search
216274
async performSearch() {
217275
if (!this.searchQuery || this.searching) return;

frontend/static/style.css

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,24 @@ body {
538538
color: var(--amber);
539539
}
540540

541+
/* Local Upload Progress (browser-side zipping/uploading) */
542+
.upload-local-progress {
543+
margin-top: 1rem;
544+
padding: 0.75rem;
545+
background: var(--gauge-bg);
546+
border: 2px solid var(--amber);
547+
border-radius: 4px;
548+
}
549+
550+
.upload-phase-label {
551+
font-size: 0.85rem;
552+
color: var(--amber);
553+
margin-bottom: 0.5rem;
554+
text-transform: uppercase;
555+
letter-spacing: 1px;
556+
animation: pulse 1s infinite;
557+
}
558+
541559
/* Jobs Panel - Full Width */
542560
.section.jobs-section {
543561
grid-column: 1 / -1;
@@ -617,7 +635,8 @@ body {
617635

618636
/* Pipeline stages */
619637
.job-initializing .job-badge,
620-
.job-extracting .job-badge {
638+
.job-extracting .job-badge,
639+
.job-extracting_zip .job-badge {
621640
background: rgba(255,179,71,0.2);
622641
color: var(--amber);
623642
border: 1px solid var(--amber);

0 commit comments

Comments
 (0)