chore(release): rilascia versione 1.2.0

strawberry-code · strawberry-code · commit e6c0698cbdda · 2025-12-02T23:30:48.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,11 +7,17 @@ and the project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+## [1.2.0] - 2025-12-02
+
 ### Added
 - Progress bar now shows processing stages (extracting, chunking, embedding, uploading) in real-time
+- Client-side ZIP compression for multi-file uploads, reducing N HTTP requests to 1 and improving upload speed
+- New `/api/upload-zip` endpoint for server-side ZIP extraction and batch processing
+- Browser-side progress feedback: "Zipping..." and "Uploading..." phases before server processing
 
 ### Changed
 - Batch embedding using Ollama /api/embed endpoint, reducing API calls from N to N/10 for faster uploads
+- Multi-file uploads now automatically use ZIP compression (threshold: >1 file or >5MB total)
 
 ### Fixed
 - Silenced verbose httpx/httpcore logs that spammed 60+ lines per file upload
diff --git a/api/routes/upload.py b/api/routes/upload.py
@@ -10,6 +10,7 @@
 import uuid
 import logging
 import traceback
+import zipfile
 from pathlib import Path
 from typing import Optional, List
 from datetime import datetime
@@ -313,6 +314,180 @@ async def upload_multiple_files(
     )
 
 
+def run_zip_indexing(job_id: str, zip_path: Path, collection_dir: Path, collection: str):
+    """
+    Extract ZIP and run indexing pipeline.
+
+    Args:
+        job_id: Job identifier for tracking
+        zip_path: Path to uploaded ZIP file
+        collection_dir: Target directory for extracted files
+        collection: Target collection name
+    """
+    logger.info(f"[{job_id}] Starting ZIP extraction from {zip_path}")
+
+    try:
+        jobs[job_id]["status"] = "running"
+        jobs[job_id]["stage"] = "extracting_zip"
+        jobs[job_id]["message"] = "Extracting ZIP archive"
+        jobs[job_id]["progress"] = 0.05
+
+        # Extract ZIP, filtering out macOS metadata and hidden files
+        extracted_files = []
+        with zipfile.ZipFile(zip_path, 'r') as zf:
+            for name in zf.namelist():
+                # Skip macOS metadata, hidden files, and directories
+                if name.startswith('__MACOSX') or name.startswith('.') or name.endswith('/'):
+                    continue
+                # Skip nested hidden files (e.g., folder/.hidden)
+                if '/.' in name:
+                    continue
+
+                zf.extract(name, collection_dir)
+                extracted_files.append(name)
+
+        # Remove ZIP after extraction
+        zip_path.unlink()
+
+        logger.info(f"[{job_id}] Extracted {len(extracted_files)} files from ZIP")
+        jobs[job_id]["message"] = f"Extracted {len(extracted_files)} files"
+        jobs[job_id]["progress"] = 0.15
+        jobs[job_id]["filename"] = f"{len(extracted_files)} files"
+
+        if not extracted_files:
+            jobs[job_id]["status"] = "completed"
+            jobs[job_id]["stage"] = "completed"
+            jobs[job_id]["message"] = "ZIP was empty or contained only hidden files"
+            jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
+            return
+
+        # Import RagifyPipeline
+        from ragify import RagifyPipeline
+        from lib.config import RagifyConfig
+        from lib.tika_check import check_tika_available
+
+        # Configure
+        config = RagifyConfig.default()
+        config.qdrant.collection = collection
+
+        # Check Tika availability
+        tika_status = check_tika_available()
+        use_tika = tika_status['can_use_tika']
+        logger.info(f"[{job_id}] Tika available: {use_tika}")
+
+        # Progress callback
+        def update_progress(stage: str, progress: float):
+            # Scale progress: extraction was 0-0.15, pipeline is 0.15-1.0
+            scaled_progress = 0.15 + (progress * 0.85)
+            jobs[job_id]["stage"] = stage
+            jobs[job_id]["progress"] = scaled_progress
+
+        # Run pipeline
+        pipeline = RagifyPipeline(config, use_tika=use_tika)
+        stats = pipeline.process_directory(collection_dir, progress_callback=update_progress)
+
+        # Update job with results
+        jobs[job_id]["progress"] = 1.0
+        jobs[job_id]["status"] = "completed"
+        jobs[job_id]["stage"] = "completed"
+        jobs[job_id]["message"] = (
+            f"Indexed {stats['processed']}/{stats['processed'] + stats['failed']} files, "
+            f"{stats['chunks']} chunks"
+        )
+        jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
+
+        logger.info(f"[{job_id}] ZIP indexing COMPLETED: {stats['processed']} files, {stats['chunks']} chunks")
+
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(f"[{job_id}] ZIP indexing FAILED: {error_msg}")
+        logger.error(f"[{job_id}] Stack trace:\n{traceback.format_exc()}")
+
+        # Cleanup ZIP if still exists
+        if zip_path.exists():
+            try:
+                zip_path.unlink()
+            except Exception:
+                pass
+
+        jobs[job_id]["status"] = "failed"
+        jobs[job_id]["stage"] = "failed"
+        jobs[job_id]["message"] = error_msg
+        jobs[job_id]["completed_at"] = datetime.utcnow().isoformat()
+
+
+@router.post("/upload-zip")
+async def upload_zip(
+    background_tasks: BackgroundTasks,
+    file: UploadFile = File(...),
+    collection: str = Form(default="documentation")
+):
+    """
+    Upload a ZIP file for extraction and indexing.
+
+    The ZIP is extracted server-side, then all files are processed
+    by RagifyPipeline as a single job.
+
+    Args:
+        file: ZIP file to upload
+        collection: Target collection name
+
+    Returns:
+        dict: Job information
+    """
+    # Trigger cleanup
+    cleanup_old_files()
+
+    # Validate file
+    if not file.filename:
+        raise HTTPException(status_code=400, detail="No filename provided")
+
+    if not file.filename.endswith('.zip'):
+        raise HTTPException(status_code=400, detail="File must be a ZIP archive")
+
+    # Create collection directory
+    collection_dir = COLLECTIONS_DIR / collection
+    collection_dir.mkdir(parents=True, exist_ok=True)
+
+    # Save ZIP temporarily with unique name
+    zip_path = collection_dir / f"_upload_{uuid.uuid4().hex}.zip"
+    try:
+        content = await file.read()
+        zip_path.write_bytes(content)
+        logger.info(f"Saved ZIP: {zip_path} ({len(content)} bytes)")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to save ZIP: {e}")
+
+    # Create job record
+    job_id = str(uuid.uuid4())
+    jobs[job_id] = {
+        "job_id": job_id,
+        "status": "pending",
+        "stage": "pending",
+        "collection": collection,
+        "filename": "ZIP archive",
+        "progress": 0.0,
+        "message": "ZIP uploaded, extraction starting",
+        "created_at": datetime.utcnow().isoformat(),
+        "completed_at": None
+    }
+
+    # Start background processing
+    background_tasks.add_task(
+        run_zip_indexing,
+        job_id,
+        zip_path,
+        collection_dir,
+        collection
+    )
+
+    return JobCreate(
+        job_id=job_id,
+        status="pending",
+        message=f"ZIP uploaded to collection '{collection}', extraction and indexing started"
+    )
+
+
 @router.get("/jobs/{job_id}")
 async def get_job_status(job_id: str):
     """
diff --git a/frontend/index.html b/frontend/index.html
@@ -6,6 +6,7 @@
     <title>Ragify - Control Board</title>
     <link rel="icon" type="image/svg+xml" href="/static/favicon.svg">
     <link rel="apple-touch-icon" href="/static/favicon.svg">
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/jszip/3.10.1/jszip.min.js"></script>
     <script defer src="https://unpkg.com/alpinejs@3.13.5/dist/cdn.min.js"></script>
     <link rel="stylesheet" href="/static/style.css">
 </head>
@@ -163,6 +164,14 @@ <h2>Document Input</h2>
                         <span x-show="uploading">Processing...</span>
                     </button>
                 </div>
+
+                <!-- Local Upload Progress (browser-side) -->
+                <div class="upload-local-progress" x-show="uploadPhase">
+                    <div class="upload-phase-label" x-text="uploadPhase === 'zipping' ? 'Zipping files...' : uploadPhase === 'uploading' ? 'Uploading...' : ''"></div>
+                    <div class="progress-bar">
+                        <div class="progress-fill" :style="'width: ' + (uploadLocalProgress * 100) + '%'"></div>
+                    </div>
+                </div>
             </section>
 
             <!-- Jobs Panel -->
diff --git a/frontend/static/app.js b/frontend/static/app.js
@@ -25,6 +25,8 @@ function ragifyApp() {
         uploadQueue: [],
         uploading: false,
         dragOver: false,
+        uploadPhase: '', // 'zipping', 'uploading', ''
+        uploadLocalProgress: 0,
 
         // Search
         searchCollection: '',
@@ -180,38 +182,94 @@ function ragifyApp() {
 
         async startUpload() {
             if (this.uploadQueue.length === 0 || this.uploading) return;
-
             this.uploading = true;
 
-            for (const file of this.uploadQueue) {
-                try {
-                    const formData = new FormData();
-                    formData.append('file', file);
-                    formData.append('collection', this.uploadCollection);
-
-                    const res = await fetch('/api/upload', {
-                        method: 'POST',
-                        body: formData,
-                        credentials: 'include'
-                    });
-
-                    if (res.ok) {
-                        const data = await res.json();
-                        this.showToast(`Uploaded: ${file.name} (Job: ${data.job_id?.slice(0,8) || 'queued'})`, 'success');
-                    } else {
-                        const error = await res.json().catch(() => ({}));
-                        this.showToast(`Failed: ${file.name} - ${error.detail || res.statusText}`, 'error');
-                    }
-                } catch (e) {
-                    this.showToast(`Failed: ${file.name} - ${e.message || 'Network error'}`, 'error');
+            // Decide: ZIP if multiple files OR total size > 5MB
+            const totalSize = this.uploadQueue.reduce((sum, f) => sum + f.size, 0);
+            const shouldZip = this.uploadQueue.length > 1 || totalSize > 5 * 1024 * 1024;
+
+            try {
+                if (shouldZip) {
+                    await this.uploadAsZip();
+                } else {
+                    await this.uploadSingleFile();
                 }
+            } catch (e) {
+                this.showToast(`Upload failed: ${e.message || 'Unknown error'}`, 'error');
             }
 
             this.uploadQueue = [];
             this.uploading = false;
+            this.uploadPhase = '';
+            this.uploadLocalProgress = 0;
             await this.loadJobs();
         },
 
+        async uploadSingleFile() {
+            const file = this.uploadQueue[0];
+            this.uploadPhase = 'uploading';
+            this.uploadLocalProgress = 0;
+
+            const formData = new FormData();
+            formData.append('file', file);
+            formData.append('collection', this.uploadCollection);
+
+            const res = await fetch('/api/upload', {
+                method: 'POST',
+                body: formData,
+                credentials: 'include'
+            });
+
+            if (res.ok) {
+                const data = await res.json();
+                this.showToast(`Uploaded: ${file.name} (Job: ${data.job_id?.slice(0,8) || 'queued'})`, 'success');
+            } else {
+                const error = await res.json().catch(() => ({}));
+                throw new Error(error.detail || res.statusText);
+            }
+        },
+
+        async uploadAsZip() {
+            // Phase 1: Zipping
+            this.uploadPhase = 'zipping';
+            this.uploadLocalProgress = 0;
+
+            const zip = new JSZip();
+            for (const file of this.uploadQueue) {
+                zip.file(file.name, file);
+            }
+
+            const blob = await zip.generateAsync({
+                type: 'blob',
+                compression: 'DEFLATE',
+                compressionOptions: { level: 6 }
+            }, (meta) => {
+                this.uploadLocalProgress = meta.percent / 100;
+            });
+
+            // Phase 2: Uploading
+            this.uploadPhase = 'uploading';
+            this.uploadLocalProgress = 0;
+
+            const formData = new FormData();
+            formData.append('file', blob, 'upload.zip');
+            formData.append('collection', this.uploadCollection);
+
+            const res = await fetch('/api/upload-zip', {
+                method: 'POST',
+                body: formData,
+                credentials: 'include'
+            });
+
+            if (res.ok) {
+                const data = await res.json();
+                this.showToast(`Uploaded ${this.uploadQueue.length} files as ZIP (Job: ${data.job_id?.slice(0,8) || 'queued'})`, 'success');
+            } else {
+                const error = await res.json().catch(() => ({}));
+                throw new Error(error.detail || res.statusText);
+            }
+        },
+
         // Search
         async performSearch() {
             if (!this.searchQuery || this.searching) return;
diff --git a/frontend/static/style.css b/frontend/static/style.css
@@ -538,6 +538,24 @@ body {
     color: var(--amber);
 }
 
+/* Local Upload Progress (browser-side zipping/uploading) */
+.upload-local-progress {
+    margin-top: 1rem;
+    padding: 0.75rem;
+    background: var(--gauge-bg);
+    border: 2px solid var(--amber);
+    border-radius: 4px;
+}
+
+.upload-phase-label {
+    font-size: 0.85rem;
+    color: var(--amber);
+    margin-bottom: 0.5rem;
+    text-transform: uppercase;
+    letter-spacing: 1px;
+    animation: pulse 1s infinite;
+}
+
 /* Jobs Panel - Full Width */
 .section.jobs-section {
     grid-column: 1 / -1;
@@ -617,7 +635,8 @@ body {
 
 /* Pipeline stages */
 .job-initializing .job-badge,
-.job-extracting .job-badge {
+.job-extracting .job-badge,
+.job-extracting_zip .job-badge {
     background: rgba(255,179,71,0.2);
     color: var(--amber);
     border: 1px solid var(--amber);