webrecorder · ikreymer · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py
@@ -36,6 +36,8 @@
 
 from .db import BaseMongoModel
 
+from .utils import is_bool
+
 # num browsers per crawler instance
 NUM_BROWSERS = int(os.environ.get("NUM_BROWSERS", 2))
 
@@ -65,6 +67,12 @@
 # Minimum part size for file uploads
 MIN_UPLOAD_PART_SIZE = 10000000
 
+# enable dedupe by default
+DEDUPE_FEATURE_ENABLED_DEFAULT = is_bool(
+    os.environ.get("DEDUPE_FEATURE_ENABLED_DEFAULT")
+)
+
+
 # annotated types
 # ============================================================================
 
@@ -2325,7 +2333,7 @@ class FeatureFlags(ValidatedFeatureFlags):
 
     dedupeEnabled: bool = Field(
         description="Enable deduplication options for an org. Intended for beta-testing dedupe.",
-        default=False,
+        default=DEDUPE_FEATURE_ENABLED_DEFAULT,
     )
 
 

diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml
@@ -114,6 +114,8 @@ data:
 
   ENABLE_AUTO_RESIZE_INDEX_STORAGE: "{{ .Values.dedupe.enable_auto_resize }}"
 
+  DEDUPE_FEATURE_ENABLED_DEFAULT: "{{ .Values.dedupe.default_enabled }}"
+
 
   {{- if .Values.available_plans }}
   AVAILABLE_PLANS: {{ .Values.available_plans | toJson }}

diff --git a/chart/test/test-nightly-addons.yaml b/chart/test/test-nightly-addons.yaml
@@ -26,6 +26,8 @@ dedupe:
 
   importer_channel: dedupe
 
+  default_enabled: false
+
   memory: "1Gi"
   cpu: "100m"
   storage: "1Gi"

diff --git a/chart/test/test.yaml b/chart/test/test.yaml
@@ -24,6 +24,9 @@ redis_storage: "100Mi"
 profile_browser_workdir_size: "100Mi"
 crawler_storage: "500Mi"
 
+# dedupe
+dedupe:
+  default_enabled: false
 
 # for testing only
 crawler_extra_cpu_per_browser: 300m

diff --git a/chart/values.yaml b/chart/values.yaml
@@ -55,6 +55,7 @@ crawler_extra_args: ""
 # max allowed browser windows per crawl
 max_browser_windows: 8
 
+
 # Cluster Settings
 # =========================================
 name: browsertrix-cloud
@@ -250,7 +251,7 @@ redis_memory: "200Mi"
 redis_storage: "3Gi"
 
 
-# Redis Dedup Index
+# Dedupe Index
 # =========================================
 dedupe:
   backend_type: kvrocks
@@ -261,6 +262,9 @@ dedupe:
   # backend_type: redis
   # dedupe_image: redis
 
+  # enabled by default without feature flag
+  default_enabled: true
+
   memory: "1Gi"
   cpu: "100m"
 

diff --git a/frontend/docs/docs/user-guide/deduplication.md b/frontend/docs/docs/user-guide/deduplication.md
@@ -1,5 +1,10 @@
 # Deduplication
 
+!!! info "Deduplication is in Beta"
+
+    As of the current release, the feature is still in beta and may not be available to all users.
+    If you don't see the options below, consult your admin or reach out to support to request access.
+
 ## Overview
 
 Deduplication (or “dedupe”) is the process of preventing duplicate content from being stored during crawling. In Browsertrix, deduplication is facilitated through [collections](./collection.md), which allow arbitrary grouping of crawled content as needed.

diff --git a/frontend/docs/docs/user-guide/org-settings.md b/frontend/docs/docs/user-guide/org-settings.md
@@ -31,6 +31,11 @@ Set default suggested settings for all new crawl workflows. When creating a new
 
 ## Deduplication
 
+!!! info "Deduplication is in Beta"
+
+    As of the current release, the feature is still in beta and may not be available to all users.
+    If you don't see the options below, consult your admin or reach out to support to request access.
+
 View and manage deduplication indexes for all collections used as [deduplication sources](deduplication.md) in the org. Each entry includes information such as how many archived items and URLs are included in the index and how many deleted archived items are purgeable from the index. From the action menu, purge or delete the deduplication index for each collection.
 
 <!-- ## Limits

diff --git a/frontend/docs/docs/user-guide/workflow-setup.md b/frontend/docs/docs/user-guide/workflow-setup.md
@@ -427,6 +427,11 @@ Cron schedules are always in [UTC](https://en.wikipedia.org/wiki/Coordinated_Uni
 
 ## Deduplication
 
+!!! info "Deduplication is in Beta"
+
+    As of the current release, the feature is still in beta and may not be available to all users.
+    If you don't see the options below, consult your admin or reach out to support to request access.
+
 Prevent duplicate content from being crawled and stored.
 
 ### Crawl Deduplication