From c562f60f63ecbd012d99fe588d2b9c9b753c2d24 Mon Sep 17 00:00:00 2001
From: Michael Buckingham <michael.buckingham74@gmail.com>
Date: Mon, 1 Dec 2025 21:22:22 -0500
Subject: [PATCH 1/6] Enable Playwright by default for all scrape sources
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Migration 019 sets use_playwright=True for all existing sources and
changes the column default so new sources also use Playwright.

Most modern job sites use JavaScript rendering, and without Playwright
the scraper only gets the initial HTML before JS executes, missing
dynamically loaded job listings. This was causing ~50% of scraping
failures.

Changes:
- Add migration 019_enable_playwright_by_default.py
- Update CLAUDE_STATUS.md with new default behavior
- Update scraper guide to clarify Playwright is enabled by default

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 CLAUDE_STATUS.md                              |  7 ++-
 .../019_enable_playwright_by_default.py       | 52 +++++++++++++++++++
 .../app/templates/admin/scraper_guide.html    |  8 +--
 3 files changed, 62 insertions(+), 5 deletions(-)
 create mode 100644 backend/alembic/versions/019_enable_playwright_by_default.py

diff --git a/CLAUDE_STATUS.md b/CLAUDE_STATUS.md
index 3cf08b2..866bec9 100644
--- a/CLAUDE_STATUS.md
+++ b/CLAUDE_STATUS.md
@@ -349,7 +349,12 @@ cd backend && pytest tests/ -v
 - `backend/scraper/playwright_fetcher.py` - Python client for Playwright service
 - `backend/scraper/sources/generic.py` - `_fetch_page()` with Playwright/httpx logic
 - `backend/scraper/runner.py` - Always enables Playwright for all scrapers
-- `backend/alembic/versions/006_add_use_playwright.py` - Migration (legacy)
+- `backend/alembic/versions/019_enable_playwright_by_default.py` - Sets `use_playwright=True` for all sources
+
+**Database Default:**
+- `use_playwright` column defaults to `True` for new sources (migration 019)
+- All existing sources were updated to `use_playwright=True`
+- The toggle exists in admin for rare cases where httpx-only is needed
 
 **Interactive Page Features (Playwright):**
 - `selectActions` - Array of `{selector, value}` for dropdown selection before page extraction
diff --git a/backend/alembic/versions/019_enable_playwright_by_default.py b/backend/alembic/versions/019_enable_playwright_by_default.py
new file mode 100644
index 0000000..46a13c6
--- /dev/null
+++ b/backend/alembic/versions/019_enable_playwright_by_default.py
@@ -0,0 +1,52 @@
+"""Enable Playwright by default for all sources
+
+This migration:
+1. Sets use_playwright=True for all existing sources that have it False or NULL
+2. Changes the column default to True for new sources
+
+Playwright is required for most modern job sites that use JavaScript rendering.
+Without it, the scraper only gets the initial HTML before JS executes, missing
+dynamically loaded job listings.
+
+Revision ID: 019
+Revises: 018
+Create Date: 2025-12-01
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '019'
+down_revision = '018'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Enable Playwright for all existing sources
+    op.execute(
+        "UPDATE scrape_sources SET use_playwright = TRUE WHERE use_playwright = FALSE OR use_playwright IS NULL"
+    )
+
+    # Change the column default to True for new sources
+    op.alter_column(
+        'scrape_sources',
+        'use_playwright',
+        server_default=sa.text('1'),  # MySQL uses 1 for True
+        existing_type=sa.Boolean(),
+        existing_nullable=True
+    )
+
+
+def downgrade() -> None:
+    # Revert column default to False
+    op.alter_column(
+        'scrape_sources',
+        'use_playwright',
+        server_default=sa.text('0'),
+        existing_type=sa.Boolean(),
+        existing_nullable=True
+    )
+    # Note: We don't revert existing data as that could break working scrapers
diff --git a/backend/app/templates/admin/scraper_guide.html b/backend/app/templates/admin/scraper_guide.html
index ea76a5e..6aaa3bd 100644
--- a/backend/app/templates/admin/scraper_guide.html
+++ b/backend/app/templates/admin/scraper_guide.html
@@ -507,7 +507,7 @@ <h4 class="font-semibold text-gray-900 dark:text-white mb-2">State Abbreviation<
 <!-- Playwright Features -->
 <div class="bg-white dark:bg-gray-800 rounded-lg shadow-sm border border-gray-200 dark:border-gray-700 p-6 mb-8">
     <h3 class="text-lg font-semibold text-gray-900 dark:text-white mb-2">Playwright Features</h3>
-    <p class="text-gray-600 dark:text-gray-400 mb-4">Playwright is a headless browser that renders JavaScript. It's used automatically for all scrapers but provides extra features for DynamicScrapers.</p>
+    <p class="text-gray-600 dark:text-gray-400 mb-4">Playwright is a headless browser that renders JavaScript. <strong>It's enabled by default for all sources</strong> to ensure JavaScript-rendered job listings are properly loaded. DynamicScrapers can also use these additional interactive features:</p>
 
     <div class="grid md:grid-cols-2 gap-4">
         <div class="p-4 bg-gray-50 dark:bg-gray-900 rounded">
@@ -551,7 +551,7 @@ <h3 class="text-lg font-semibold text-gray-900 dark:text-white mb-2">Special Fla
                 <span class="px-2 py-1 bg-blue-100 dark:bg-blue-900 text-blue-800 dark:text-blue-200 rounded text-xs font-medium">use_playwright</span>
             </div>
             <div>
-                <p class="text-sm text-gray-600 dark:text-gray-400">Force Playwright browser rendering. Enabled by default for all scrapers, but can be explicitly set for DynamicScrapers that need it.</p>
+                <p class="text-sm text-gray-600 dark:text-gray-400"><strong>Enabled by default.</strong> All new sources use Playwright browser rendering automatically. This ensures JavaScript-rendered content is properly loaded. Only disable for rare cases where httpx-only is specifically needed.</p>
             </div>
         </div>
         <div class="flex items-start gap-4 p-4 bg-gray-50 dark:bg-gray-900 rounded">
@@ -581,10 +581,10 @@ <h3 class="text-lg font-semibold text-gray-900 dark:text-white mb-4">Troubleshoo
         <div>
             <h4 class="font-medium text-gray-900 dark:text-white mb-1">No jobs found</h4>
             <ul class="text-sm text-gray-600 dark:text-gray-400 list-disc list-inside">
-                <li>Check if the page requires JavaScript - enable Playwright</li>
-                <li>Verify CSS selectors match actual page structure</li>
+                <li>Verify CSS selectors match actual page structure (use browser DevTools)</li>
                 <li>Check for robots.txt blocking in scrape history</li>
                 <li>Try "Analyze Page with AI" for selector suggestions</li>
+                <li>Playwright is enabled by default - if issues persist, check Playwright service logs</li>
             </ul>
         </div>
         <div>

From 129fd56d26ec42c14376e96c2a8c109d122d193d Mon Sep 17 00:00:00 2001
From: Michael Buckingham <michael.buckingham74@gmail.com>
Date: Mon, 1 Dec 2025 21:37:26 -0500
Subject: [PATCH 2/6] Fix use_playwright to actually default to True
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous commit only set DB default but ORM default was still False,
and runner.py hardcoded True ignoring the database setting entirely.

Fixes:
- Change ORM default from False to True in scrape_source.py
- Runner now reads source.use_playwright (with True fallback for NULL)
- Update/add tests to verify the default behavior

This ensures:
1. New sources created via admin/CSV have use_playwright=True
2. The admin toggle can actually disable Playwright for rare httpx-only cases

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 backend/app/models/scrape_source.py |  4 ++--
 backend/scraper/runner.py           |  4 ++--
 backend/tests/test_models.py        | 31 ++++++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/backend/app/models/scrape_source.py b/backend/app/models/scrape_source.py
index 7184ac7..ffbfef6 100644
--- a/backend/app/models/scrape_source.py
+++ b/backend/app/models/scrape_source.py
@@ -41,8 +41,8 @@ class ScrapeSource(Base):
     max_pages = Column(Integer, nullable=True, default=10)
 
     # Use Playwright (headless browser) instead of httpx for fetching
-    # Useful for sites with bot protection or JavaScript-rendered content
-    use_playwright = Column(Boolean, default=False)
+    # Enabled by default - most modern job sites use JavaScript rendering
+    use_playwright = Column(Boolean, default=True)
 
     # Default location to use when scraper doesn't extract location from page
     # e.g., "Bethel" for City of Bethel jobs, "Kotzebue" for City of Kotzebue
diff --git a/backend/scraper/runner.py b/backend/scraper/runner.py
index 10d7de4..d7888ca 100644
--- a/backend/scraper/runner.py
+++ b/backend/scraper/runner.py
@@ -272,8 +272,8 @@ def get_source_config(source: ScrapeSource) -> dict:
         "url_attribute": source.url_attribute,
         "selector_next_page": source.selector_next_page,
         "max_pages": source.max_pages,
-        # Always use Playwright - overhead is minimal vs failing on JS sites
-        "use_playwright": True,
+        # Use Playwright by default (True), but respect database setting for rare httpx-only cases
+        "use_playwright": source.use_playwright if source.use_playwright is not None else True,
         "default_location": source.default_location,
         "default_state": source.default_state,
         # SitemapScraper configuration
diff --git a/backend/tests/test_models.py b/backend/tests/test_models.py
index fd3c3d2..c33d5f6 100644
--- a/backend/tests/test_models.py
+++ b/backend/tests/test_models.py
@@ -521,11 +521,40 @@ def test_source_default_values(self, db):
 
         assert source.scraper_class == "GenericScraper"
         assert source.is_active is True
-        assert source.use_playwright is False
+        assert source.use_playwright is True  # Default to True for JS-rendered sites
         assert source.max_pages == 10
         assert source.url_attribute == "href"
         assert source.created_at is not None
 
+    def test_source_playwright_default_is_true(self, db):
+        """New sources should have use_playwright=True by default.
+
+        Most modern job sites use JavaScript rendering, so Playwright
+        should be enabled by default to avoid missing dynamically loaded content.
+        """
+        source = ScrapeSource(
+            name="Playwright Default Test",
+            base_url="https://example.com",
+        )
+        db.add(source)
+        db.commit()
+        db.refresh(source)
+
+        assert source.use_playwright is True, "New sources should default to use_playwright=True"
+
+    def test_source_playwright_can_be_disabled(self, db):
+        """Sources can explicitly disable Playwright for rare httpx-only cases."""
+        source = ScrapeSource(
+            name="No Playwright Source",
+            base_url="https://example.com",
+            use_playwright=False,
+        )
+        db.add(source)
+        db.commit()
+        db.refresh(source)
+
+        assert source.use_playwright is False, "Should be able to explicitly disable Playwright"
+
     def test_source_jobs_relationship(self, db):
         """ScrapeSource has jobs relationship."""
         source = ScrapeSource(

From 88a3b4da250e098789d98888120d21a56f530f4d Mon Sep 17 00:00:00 2001
From: Michael Buckingham <michael.buckingham74@gmail.com>
Date: Mon, 1 Dec 2025 21:47:44 -0500
Subject: [PATCH 3/6] Add use_playwright checkbox to configure source form
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Prevents the Configure Source form from silently resetting
use_playwright to False on every save. The checkbox is checked
by default for new sources and preserves the existing value for
existing sources.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 backend/app/templates/admin/configure_source.html | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/backend/app/templates/admin/configure_source.html b/backend/app/templates/admin/configure_source.html
index 0741478..757602e 100644
--- a/backend/app/templates/admin/configure_source.html
+++ b/backend/app/templates/admin/configure_source.html
@@ -194,6 +194,17 @@ <h3 class="text-lg font-semibold dark:text-white mb-2">Scraper Type</h3>
                 <strong>Dynamic:</strong> Uses AI-generated custom scraper code. Only use if others don't work.
             </p>
         </div>
+        <div class="mt-4">
+            <label class="flex items-center gap-3 cursor-pointer">
+                <input type="checkbox" id="use_playwright" name="use_playwright" value="1"
+                       {% if source.use_playwright is not none %}{% if source.use_playwright %}checked{% endif %}{% else %}checked{% endif %}
+                       class="w-4 h-4 text-blue-600 bg-gray-100 dark:bg-gray-700 border-gray-300 dark:border-gray-600 rounded focus:ring-blue-500 focus:ring-2">
+                <span class="text-sm font-medium text-gray-700 dark:text-gray-300">Use Playwright (Headless Browser)</span>
+            </label>
+            <p class="text-xs text-gray-500 dark:text-gray-400 mt-1 ml-7">
+                Enabled by default. Uses a real browser to render JavaScript-heavy pages. Disable only for simple static HTML sites.
+            </p>
+        </div>
     </div>
 
     <!-- Sitemap Configuration (shown when SitemapScraper selected) -->

From 7c7367c0fe8df7b33b6b2ff6a283376970df8f10 Mon Sep 17 00:00:00 2001
From: Michael Buckingham <michael.buckingham74@gmail.com>
Date: Mon, 1 Dec 2025 21:52:11 -0500
Subject: [PATCH 4/6] Show Playwright status in scrape modal loading state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Display "Using Playwright (headless browser)" or "Using httpx (direct HTTP)"
in the scrape modal while the scrape is running, so admins can confirm
which fetch method is being used without checking logs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 backend/app/templates/admin/configure_source.html  | 14 +++++++++++---
 .../admin/partials/generated_scraper.html          |  2 +-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/backend/app/templates/admin/configure_source.html b/backend/app/templates/admin/configure_source.html
index 757602e..de688e4 100644
--- a/backend/app/templates/admin/configure_source.html
+++ b/backend/app/templates/admin/configure_source.html
@@ -35,7 +35,7 @@ <h2 class="text-2xl font-bold text-gray-900 dark:text-white">Configure Source</h
                 hx-post="/admin/sources/{{ source.id }}/scrape"
                 hx-target="#scrape-modal-result"
                 hx-swap="innerHTML"
-                onclick="showScrapeModal('{{ source.name }}')">
+                onclick="showScrapeModal('{{ source.name }}', {{ 'true' if source.use_playwright or source.use_playwright is none else 'false' }})">
             <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                 <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"></path>
             </svg>
@@ -76,7 +76,7 @@ <h3 class="text-lg font-semibold text-green-800 dark:text-green-200 flex items-c
                 hx-post="/admin/sources/{{ source.id }}/scrape"
                 hx-target="#scrape-modal-result"
                 hx-swap="innerHTML"
-                onclick="showScrapeModal('{{ source.name }}')">
+                onclick="showScrapeModal('{{ source.name }}', {{ 'true' if source.use_playwright or source.use_playwright is none else 'false' }})">
             <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                 <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z"></path>
                 <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 11-18 0 9 9 0 0118 0z"></path>
@@ -391,6 +391,12 @@ <h3 class="text-lg font-semibold dark:text-white mb-2">Pagination (Optional)</h3
                     </div>
                     <p class="mt-4 text-lg font-medium text-gray-900 dark:text-white">Running scraper...</p>
                     <p id="scrape-modal-source" class="mt-1 text-sm text-gray-500 dark:text-gray-400"></p>
+                    <p id="scrape-modal-playwright" class="mt-2 text-xs text-purple-600 dark:text-purple-400 flex items-center gap-1">
+                        <svg class="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9.75 17L9 20l-1 1h8l-1-1-.75-3M3 13h18M5 17h14a2 2 0 002-2V5a2 2 0 00-2-2H5a2 2 0 00-2 2v10a2 2 0 002 2z"></path>
+                        </svg>
+                        <span id="scrape-modal-playwright-text"></span>
+                    </p>
                 </div>
             </div>
             <!-- Results will be loaded here -->
@@ -452,16 +458,18 @@ <h4 class="font-medium text-blue-800 dark:text-blue-200 mb-1">Example Selectors:
     const resultsDiv = document.getElementById('ai-results');
 
     // Scrape modal functions
-    window.showScrapeModal = function(sourceName) {
+    window.showScrapeModal = function(sourceName, usePlaywright) {
         const modal = document.getElementById('scrape-modal');
         const loadingDiv = document.getElementById('scrape-modal-loading');
         const resultDiv = document.getElementById('scrape-modal-result');
         const sourceText = document.getElementById('scrape-modal-source');
+        const playwrightText = document.getElementById('scrape-modal-playwright-text');
 
         // Reset state
         loadingDiv.classList.remove('hidden');
         resultDiv.innerHTML = '';
         sourceText.textContent = sourceName || '';
+        playwrightText.textContent = usePlaywright ? 'Using Playwright (headless browser)' : 'Using httpx (direct HTTP)';
 
         // Show modal
         modal.classList.remove('hidden');
diff --git a/backend/app/templates/admin/partials/generated_scraper.html b/backend/app/templates/admin/partials/generated_scraper.html
index bfb0ee7..5e335e1 100644
--- a/backend/app/templates/admin/partials/generated_scraper.html
+++ b/backend/app/templates/admin/partials/generated_scraper.html
@@ -36,7 +36,7 @@ <h3 class="text-lg font-semibold dark:text-white flex items-center gap-2">
                 hx-post="/admin/sources/{{ source.id }}/scrape"
                 hx-target="#scrape-modal-result"
                 hx-swap="innerHTML"
-                onclick="showScrapeModal('{{ source.name }}')"
+                onclick="showScrapeModal('{{ source.name }}', {{ 'true' if source.use_playwright or source.use_playwright is none else 'false' }})"
                 class="w-full px-4 py-3 bg-purple-600 hover:bg-purple-700 text-white font-medium rounded-lg transition-colors flex items-center justify-center gap-2">
             <svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
                 <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M14.752 11.168l-3.197-2.132A1 1 0 0010 9.87v4.263a1 1 0 001.555.832l3.197-2.132a1 1 0 000-1.664z"></path>

From afffdfedb8ed6d0885d08c643d20a05e6ed205d2 Mon Sep 17 00:00:00 2001
From: Michael Buckingham <michael.buckingham74@gmail.com>
Date: Mon, 1 Dec 2025 21:58:45 -0500
Subject: [PATCH 5/6] Add null check for Playwright text element in scrape
 modal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Defensive coding to handle case where the playwright text
element might not be found in the DOM.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 backend/app/templates/admin/configure_source.html | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backend/app/templates/admin/configure_source.html b/backend/app/templates/admin/configure_source.html
index de688e4..ebaccd3 100644
--- a/backend/app/templates/admin/configure_source.html
+++ b/backend/app/templates/admin/configure_source.html
@@ -469,7 +469,9 @@ <h4 class="font-medium text-blue-800 dark:text-blue-200 mb-1">Example Selectors:
         loadingDiv.classList.remove('hidden');
         resultDiv.innerHTML = '';
         sourceText.textContent = sourceName || '';
-        playwrightText.textContent = usePlaywright ? 'Using Playwright (headless browser)' : 'Using httpx (direct HTTP)';
+        if (playwrightText) {
+            playwrightText.textContent = usePlaywright ? 'Using Playwright (headless browser)' : 'Using httpx (direct HTTP)';
+        }
 
         // Show modal
         modal.classList.remove('hidden');

From e8c1e47dae593adedd4225eb029cf1a1bf3e2aed Mon Sep 17 00:00:00 2001
From: Michael Buckingham <michael.buckingham74@gmail.com>
Date: Mon, 1 Dec 2025 22:15:22 -0500
Subject: [PATCH 6/6] Fix scrape success/auto-enable logic to consider jobs
 found
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, a scrape was marked "Failed" if there were ANY errors,
even if jobs were successfully found. This was too strict.

Changes:
- last_scrape_success is now True if jobs were found OR no errors
- Auto-enable now triggers when jobs are found (ignores warnings)

This fixes sources staying in "Needs Configuration" and showing
"Failed" status even when they successfully scraped jobs.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 backend/app/routers/admin.py |  4 ++--
 backend/scraper/runner.py    | 18 ++++++++++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/backend/app/routers/admin.py b/backend/app/routers/admin.py
index 59f44c8..b345748 100644
--- a/backend/app/routers/admin.py
+++ b/backend/app/routers/admin.py
@@ -1074,9 +1074,9 @@ async def trigger_single_source_scrape(source_id: int, request: Request, db: Ses
 
         duration = time.time() - start_time
 
-        # Auto-enable source if it was in needs_configuration and scrape was successful
+        # Auto-enable source if it was in needs_configuration and jobs were found
         auto_enabled = False
-        if source.needs_configuration and result.jobs_found > 0 and not result.errors:
+        if source.needs_configuration and result.jobs_found > 0:
             source.is_active = True
             source.needs_configuration = False
             auto_enabled = True
diff --git a/backend/scraper/runner.py b/backend/scraper/runner.py
index d7888ca..f73aae5 100644
--- a/backend/scraper/runner.py
+++ b/backend/scraper/runner.py
@@ -341,7 +341,9 @@ def _run_adp_scraper(
 
     source.last_scraped_at = datetime.now(timezone.utc)
 
-    source.last_scrape_success = len(all_errors) == 0
+    # Success if jobs were found, even with warnings
+    jobs_found = jobs_new + jobs_updated + jobs_unchanged
+    source.last_scrape_success = jobs_found > 0 or len(all_errors) == 0
     duration = time.time() - start_time
 
     logger.info(
@@ -419,7 +421,10 @@ def _run_ultipro_scraper(
             logger.exception(f"UltiPro scraper failed for {source.name} URL: {listing_url}")
 
     source.last_scraped_at = datetime.now(timezone.utc)
-    source.last_scrape_success = len(all_errors) == 0
+
+    # Success if jobs were found, even with warnings
+    jobs_found = jobs_new + jobs_updated + jobs_unchanged
+    source.last_scrape_success = jobs_found > 0 or len(all_errors) == 0
     duration = time.time() - start_time
 
     logger.info(
@@ -497,7 +502,10 @@ def _run_workday_scraper(
             logger.exception(f"Workday scraper failed for {source.name} URL: {listing_url}")
 
     source.last_scraped_at = datetime.now(timezone.utc)
-    source.last_scrape_success = len(all_errors) == 0
+
+    # Success if jobs were found, even with warnings
+    jobs_found = jobs_new + jobs_updated + jobs_unchanged
+    source.last_scrape_success = jobs_found > 0 or len(all_errors) == 0
     duration = time.time() - start_time
 
     logger.info(
@@ -678,7 +686,9 @@ def run_scraper(db: Session, source: ScrapeSource, trigger_type: str = "manual")
         all_errors.append(f"Scraper execution failed: {e}")
 
     # Update source's last_scrape_success status
-    source.last_scrape_success = len(all_errors) == 0
+    # Success if jobs were found, even with warnings
+    jobs_found = jobs_new + jobs_updated + jobs_unchanged
+    source.last_scrape_success = jobs_found > 0 or len(all_errors) == 0
 
     duration = time.time() - start_time