levante-framework · digital-pro · Jun 30, 2025 · Jul 2, 2025 · Jul 4, 2025
diff --git a/capture_log.txt b/capture_log.txt
@@ -0,0 +1 @@
+bash: ./capture_all_tasks_complete.sh: No such file or directory
diff --git a/cleanup_screenshots_ocr.py b/cleanup_screenshots_ocr.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import shutil
+import json
+from datetime import datetime
+from collections import defaultdict
+import cv2
+import numpy as np
+
+try:
+    import pytesseract
+    from PIL import Image
+    from difflib import SequenceMatcher
+except ImportError as e:
+    print(f"Error: Missing required dependency: {e}")
+    print("Please install required packages:")
+    print("pip install pytesseract pillow opencv-python")
+    sys.exit(1)
+
+def extract_text_from_image(image_path):
+    """Extract text from image using OCR, focusing on top half"""
+    try:
+        # Open image and crop to top half for better text extraction
+        with Image.open(image_path) as img:
+            width, height = img.size
+            top_half = img.crop((0, 0, width, height // 2))
+
+            # Extract text using OCR
+            text = pytesseract.image_to_string(top_half, config='--psm 6')
+            return text.strip().lower()
+    except Exception as e:
+        print(f"Error extracting text from {image_path}: {e}")
+        return ""
+
+def calculate_image_similarity(img1_path, img2_path):
+    """Calculate image similarity using correlation coefficient"""
+    try:
+        # Read images in grayscale
+        img1 = cv2.imread(img1_path, cv2.IMREAD_GRAYSCALE)
+        img2 = cv2.imread(img2_path, cv2.IMREAD_GRAYSCALE)
+
+        if img1 is None or img2 is None:
+            return 0.0
+
+        # Resize images to same size for comparison
+        height, width = min(img1.shape[0], img2.shape[0]), min(img1.shape[1], img2.shape[1])
+        img1_resized = cv2.resize(img1, (width, height))
+        img2_resized = cv2.resize(img2, (width, height))
+
+        # Calculate correlation coefficient
+        correlation = cv2.matchTemplate(img1_resized, img2_resized, cv2.TM_CCOEFF_NORMED)[0][0]
+        return max(0.0, correlation)  # Ensure non-negative
+
+    except Exception as e:
+        print(f"Error calculating image similarity between {img1_path} and {img2_path}: {e}")
+        return 0.0
+
+def text_similarity(text1, text2):
+    """Calculate text similarity using sequence matcher"""
+    if not text1 and not text2:
+        return 1.0
+    if not text1 or not text2:
+        return 0.0
+    return SequenceMatcher(None, text1, text2).ratio()
+
+def are_duplicates(file1, file2, text1, text2):
+    """
+    Enhanced duplicate detection using both text and image similarity
+    Returns True if items are duplicates (both text AND image highly similar)
+    """
+    # Calculate text similarity
+    text_sim = text_similarity(text1, text2)
+
+    # Calculate image similarity
+    image_sim = calculate_image_similarity(file1, file2)
+
+    # Consider duplicates only if BOTH text and image are highly similar
+    text_threshold = 0.80
+    image_threshold = 0.95
+
+    is_duplicate = (text_sim >= text_threshold) and (image_sim >= image_threshold)
+
+    print(f"  Comparing {os.path.basename(file1)} vs {os.path.basename(file2)}")
+    print(f"    Text similarity: {text_sim:.3f} (threshold: {text_threshold})")
+    print(f"    Image similarity: {image_sim:.3f} (threshold: {image_threshold})")
+    print(f"    Duplicate: {is_duplicate}")
+
+    return is_duplicate
+
+def group_duplicates(screenshot_files):
+    """Group screenshots by similarity using enhanced detection"""
+    print("Extracting text from screenshots...")
+
+    # Extract text from all images
+    image_texts = {}
+    for file_path in screenshot_files:
+        text = extract_text_from_image(file_path)
+        image_texts[file_path] = text
+        print(f"  {os.path.basename(file_path)}: '{text[:50]}{'...' if len(text) > 50 else ''}'")
+
+    print("\nGrouping duplicates using enhanced detection...")
+
+    groups = []
+    processed = set()
+
+    for i, file1 in enumerate(screenshot_files):
+        if file1 in processed:
+            continue
+
+        # Start a new group with this file
+        current_group = [file1]
+        processed.add(file1)
+
+        # Find all duplicates of this file
+        for j, file2 in enumerate(screenshot_files[i+1:], i+1):
+            if file2 in processed:
+                continue
+
+            if are_duplicates(file1, file2, image_texts[file1], image_texts[file2]):
+                current_group.append(file2)
+                processed.add(file2)
+
+        groups.append(current_group)
+        print(f"Group {len(groups)}: {len(current_group)} files")
+
+    return groups
+
+def cleanup_screenshots(directory):
+    """Main cleanup function with enhanced duplicate detection"""
+    if not os.path.exists(directory):
+        print(f"Error: Directory {directory} does not exist")
+        return
+
+    # Find all PNG files
+    screenshot_files = []
+    for file in os.listdir(directory):
+        if file.lower().endswith('.png'):
+            screenshot_files.append(os.path.join(directory, file))
+
+    if not screenshot_files:
+        print(f"No PNG files found in {directory}")
+        return
+
+    screenshot_files.sort()
+    total_files = len(screenshot_files)
+
+    print(f"Found {total_files} screenshot files in {directory}")
+    print("=" * 60)
+
+    # Group duplicates using enhanced detection
+    groups = group_duplicates(screenshot_files)
+
+    # Create duplicates backup directory
+    duplicates_dir = os.path.join(directory, "duplicates_backup")
+    os.makedirs(duplicates_dir, exist_ok=True)
+
+    # Process groups
+    kept_files = []
+    moved_files = []
+
+    for i, group in enumerate(groups, 1):
+        if len(group) == 1:
+            # Single file, keep it
+            kept_files.extend(group)
+            print(f"\nGroup {i}: Keeping unique file {os.path.basename(group[0])}")
+        else:
+            # Multiple files, keep first and move others
+            keep_file = group[0]
+            duplicate_files = group[1:]
+
+            kept_files.append(keep_file)
+            moved_files.extend(duplicate_files)
+
+            print(f"\nGroup {i}: Keeping {os.path.basename(keep_file)}, moving {len(duplicate_files)} duplicates")
+
+            # Move duplicates to backup directory
+            for duplicate in duplicate_files:
+                backup_path = os.path.join(duplicates_dir, os.path.basename(duplicate))
+                # Handle name conflicts
+                counter = 1
+                while os.path.exists(backup_path):
+                    name, ext = os.path.splitext(os.path.basename(duplicate))
+                    backup_path = os.path.join(duplicates_dir, f"{name}_{counter}{ext}")
+                    counter += 1
+
+                shutil.move(duplicate, backup_path)
+                print(f"  Moved {os.path.basename(duplicate)} to duplicates_backup/")
+
+    # Generate statistics
+    unique_files = len(kept_files)
+    duplicates_removed = len(moved_files)
+    reduction_percentage = (duplicates_removed / total_files) * 100 if total_files > 0 else 0
+
+    print("\n" + "=" * 60)
+    print("CLEANUP SUMMARY")
+    print("=" * 60)
+    print(f"Total files processed: {total_files}")
+    print(f"Unique files kept: {unique_files}")
+    print(f"Duplicates moved to backup: {duplicates_removed}")
+    print(f"Reduction: {reduction_percentage:.1f}%")
+    print(f"Duplicates backed up to: {duplicates_dir}")
+
+    # Save analysis report
+    report = {
+        "timestamp": datetime.now().isoformat(),
+        "directory": directory,
+        "total_files": total_files,
+        "unique_files": unique_files,
+        "duplicates_removed": duplicates_removed,
+        "reduction_percentage": reduction_percentage,
+        "groups": len(groups),
+        "duplicates_backup_dir": duplicates_dir
+    }
+
+    report_file = os.path.join(directory, "ocr_analysis_report.json")
+    with open(report_file, 'w') as f:
+        json.dump(report, f, indent=2)
+
+    print(f"Analysis report saved to: {report_file}")
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python cleanup_screenshots_ocr.py <directory>")
+        print("Example: python cleanup_screenshots_ocr.py cypress/screenshots/memory_game_capture.cy.js/")
+        sys.exit(1)
+
+    directory = sys.argv[1]
+    cleanup_screenshots(directory) 
diff --git a/cypress.config.js b/cypress.config.js
@@ -0,0 +1,31 @@
+const { defineConfig } = require('cypress');
+
+module.exports = defineConfig({
+  e2e: {
+    setupNodeEvents(on, config) {
+      // implement node event listeners here
+    },
+    // Video recording settings
+    video: true,
+    videoCompression: 32,
+    videosFolder: 'cypress/videos',
+    screenshotsFolder: 'cypress/screenshots',
+    viewportWidth: 1000,
+    viewportHeight: 660,
+    defaultCommandTimeout: 30000,
+    requestTimeout: 30000,
+    responseTimeout: 30000,
+    pageLoadTimeout: 60000,
+    // Memory management settings
+    experimentalMemoryManagement: true,
+    numTestsKeptInMemory: 1,
+    // Reduce video frame rate for smaller files
+    env: {
+      videoFrameRate: 5  // Lower frame rate for more compact videos
+    }
+  },
+  retries: {
+    runMode: 0,
+    openMode: 0,
+  },
+});
diff --git a/cypress/e2e/adult_reasoning_improved.cy.js b/cypress/e2e/adult_reasoning_improved.cy.js
@@ -0,0 +1,72 @@
+describe('Adult Reasoning Improved Screenshot Capture', () => {
+  it('captures screenshots from adult-reasoning with proper interactions', () => {
+    cy.visit('http://localhost:8080/?task=adult-reasoning');
+
+    // Mock fullscreen API
+    cy.window().then((win) => {
+      win.document.documentElement.requestFullscreen = cy.stub().resolves();
+      Object.defineProperty(win.document, 'fullscreenElement', {
+        get: () => win.document.documentElement
+      });
+    });
+
+    // Initial screenshot
+    cy.screenshot('01-initial-load');
+
+    // Function to handle interactions and take screenshots
+    const captureWithInteraction = (screenshotName) => {
+      cy.wait(8000); // Wait 8 seconds between screenshots
+
+      // Try to interact with the task
+      cy.get('body').then(($body) => {
+        // Check for fullscreen/start buttons first
+        if ($body.find('button:contains("OK")').length > 0) {
+          cy.get('button:contains("OK")').first().click({ force: true });
+        } 
+        // Check for Continue buttons
+        else if ($body.find('button:contains("Continue")').length > 0) {
+          cy.get('button:contains("Continue")').first().click({ force: true });
+        }
+        // Check for multi-response buttons (main task responses)
+        else if ($body.find('#jspsych-html-multi-response-btngroup button').length > 0) {
+          // Click a random response button
+          cy.get('#jspsych-html-multi-response-btngroup button').then($buttons => {
+            const randomIndex = Math.floor(Math.random() * $buttons.length);
+            cy.wrap($buttons[randomIndex]).click({ force: true });
+          });
+        }
+        // Check for any other buttons
+        else if ($body.find('button').length > 0) {
+          cy.get('button').first().click({ force: true });
+        }
+        // Try clicking on clickable elements
+        else if ($body.find('[onclick], .clickable, .jspsych-btn').length > 0) {
+          cy.get('[onclick], .clickable, .jspsych-btn').first().click({ force: true });
+        }
+      });
+
+      // Take screenshot after interaction
+      cy.screenshot(screenshotName);
+    };
+
+    // Capture 15 screenshots with interactions
+    captureWithInteraction('02-after-8s');
+    captureWithInteraction('03-after-16s');
+    captureWithInteraction('04-after-24s');
+    captureWithInteraction('05-after-32s');
+    captureWithInteraction('06-after-40s');
+    captureWithInteraction('07-after-48s');
+    captureWithInteraction('08-after-56s');
+    captureWithInteraction('09-after-64s');
+    captureWithInteraction('10-after-72s');
+    captureWithInteraction('11-after-80s');
+    captureWithInteraction('12-after-88s');
+    captureWithInteraction('13-after-96s');
+    captureWithInteraction('14-after-104s');
+    captureWithInteraction('15-final');
+
+    // Final wait and screenshot
+    cy.wait(5000);
+    cy.screenshot('16-very-final');
+  });
+});
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		bash: ./capture_all_tasks_complete.sh: No such file or directory