diff --git a/docker/base/fs/etc/searxng/limiter.toml b/docker/base/fs/etc/searxng/limiter.toml index 855521bef0..6064b60b17 100644 --- a/docker/base/fs/etc/searxng/limiter.toml +++ b/docker/base/fs/etc/searxng/limiter.toml @@ -1,4 +1,4 @@ -[real_ip] +[botdetection.proxy] # Number of values to trust for X-Forwarded-For. x_for = 1 @@ -7,6 +7,11 @@ x_for = 1 ipv4_prefix = 32 ipv6_prefix = 48 +trusted_proxies = [ + "127.0.0.0/8", + "::1", +] + [botdetection.ip_limit] # To get unlimited access in a local network, by default link-local addresses # (networks) are not monitored by the ip_limit @@ -19,14 +24,8 @@ link_token = false # In the limiter, the ip_lists method has priority over all other methods. # If an IP is in the pass_ip list, it has unrestricted access and is not # checked if, for example, the "user agent" suggests a bot (e.g., curl). -block_ip = [ - # '93.184.216.34', # Example IPv4 address - # '257.1.1.1', # Invalid IP --> will be ignored, logged in ERROR class -] -pass_ip = [ - # '192.168.0.0/16', # IPv4 private network - # 'fe80::/10', # IPv6 link-local; overrides botdetection.ip_limit.filter_link_local -] +block_ip = [] +pass_ip = [] # Activate passlist of (hardcoded) IPs from the SearXNG organization, # e.g., `check.searx.space`. diff --git a/docker/base/fs/etc/searxng/settings.yml b/docker/base/fs/etc/searxng/settings.yml index 61ddae77f2..aaa36dff56 100644 --- a/docker/base/fs/etc/searxng/settings.yml +++ b/docker/base/fs/etc/searxng/settings.yml @@ -39,7 +39,7 @@ enabled_plugins: - 'Hash plugin' - 'Self Informations' - 'Tracker URL remover' - - 'Ahmia blacklist' + # - 'Hostnames plugin' # see 'hostnames' configuration below # - 'Open Access DOI rewrite' diff --git a/docker/run/fs/etc/searxng/limiter.toml b/docker/run/fs/etc/searxng/limiter.toml index 855521bef0..6064b60b17 100644 --- a/docker/run/fs/etc/searxng/limiter.toml +++ b/docker/run/fs/etc/searxng/limiter.toml @@ -1,4 +1,4 @@ -[real_ip] +[botdetection.proxy] # Number of values to trust for X-Forwarded-For. x_for = 1 @@ -7,6 +7,11 @@ x_for = 1 ipv4_prefix = 32 ipv6_prefix = 48 +trusted_proxies = [ + "127.0.0.0/8", + "::1", +] + [botdetection.ip_limit] # To get unlimited access in a local network, by default link-local addresses # (networks) are not monitored by the ip_limit @@ -19,14 +24,8 @@ link_token = false # In the limiter, the ip_lists method has priority over all other methods. # If an IP is in the pass_ip list, it has unrestricted access and is not # checked if, for example, the "user agent" suggests a bot (e.g., curl). -block_ip = [ - # '93.184.216.34', # Example IPv4 address - # '257.1.1.1', # Invalid IP --> will be ignored, logged in ERROR class -] -pass_ip = [ - # '192.168.0.0/16', # IPv4 private network - # 'fe80::/10', # IPv6 link-local; overrides botdetection.ip_limit.filter_link_local -] +block_ip = [] +pass_ip = [] # Activate passlist of (hardcoded) IPs from the SearXNG organization, # e.g., `check.searx.space`. diff --git a/docker/run/fs/etc/searxng/settings.yml b/docker/run/fs/etc/searxng/settings.yml index 147bee3f94..bb4423057c 100644 --- a/docker/run/fs/etc/searxng/settings.yml +++ b/docker/run/fs/etc/searxng/settings.yml @@ -44,7 +44,7 @@ enabled_plugins: - 'Hash plugin' - 'Self Informations' - 'Tracker URL remover' - - 'Ahmia blacklist' + # - 'Hostnames plugin' # see 'hostnames' configuration below # - 'Open Access DOI rewrite' diff --git a/python/extensions/agent_init/_15_load_profile_settings.py b/python/extensions/agent_init/_15_load_profile_settings.py index d4c9b5ab42..562f404b77 100644 --- a/python/extensions/agent_init/_15_load_profile_settings.py +++ b/python/extensions/agent_init/_15_load_profile_settings.py @@ -4,7 +4,7 @@ class LoadProfileSettings(Extension): - + async def execute(self, **kwargs) -> None: if not self.agent or not self.agent.config.profile: @@ -36,18 +36,18 @@ async def execute(self, **kwargs) -> None: if settings_override: # Preserve the original memory_subdir unless it's explicitly overridden current_memory_subdir = self.agent.config.memory_subdir + # FIX: Also preserve the original profile + original_profile = self.agent.config.profile + new_config = initialize_agent(override_settings=settings_override) + if ( "agent_memory_subdir" not in settings_override and current_memory_subdir != "default" ): new_config.memory_subdir = current_memory_subdir + + # FIX: Restore the original profile + new_config.profile = original_profile + self.agent.config = new_config - # self.agent.context.log.log( - # type="info", - # content=( - # "Loaded custom settings for agent " - # f"{self.agent.number} with profile '{self.agent.config.profile}'." - # ), - # ) - diff --git a/python/helpers/document_query.py b/python/helpers/document_query.py index 6ba38855c6..40edf48272 100644 --- a/python/helpers/document_query.py +++ b/python/helpers/document_query.py @@ -373,6 +373,27 @@ async def document_qa( *[self.document_get_content(uri, True) for uri in document_uris] ) await self.agent.handle_intervention() + + # Calculate adaptive threshold based on document sizes + # Count total chunks across all documents + total_chunks = 0 + indexed_uris = [] + for uri in document_uris: + norm_uri = self.store.normalize_uri(uri) + chunks = await self.store._get_document_chunks(norm_uri) + total_chunks += len(chunks) + indexed_uris.append(norm_uri) + + # For very small documents, use lower threshold to ensure retrieval + if total_chunks < 5: + adaptive_threshold = 0.0 # Accept any similarity for tiny docs + elif total_chunks < 10: + adaptive_threshold = 0.3 + else: + adaptive_threshold = DEFAULT_SEARCH_THRESHOLD + + self.progress_callback(f"Total document chunks: {total_chunks}, Using adaptive search threshold: {adaptive_threshold}") + selected_chunks = {} for question in questions: self.progress_callback(f"Optimizing query: {question}") @@ -399,14 +420,23 @@ async def document_qa( chunks = await self.store.search_documents( query=optimized_query, limit=100, - threshold=DEFAULT_SEARCH_THRESHOLD, + threshold=adaptive_threshold, filter=doc_filter, ) self.progress_callback(f"Found {len(chunks)} chunks") - for chunk in chunks: - selected_chunks[chunk.metadata["id"]] = chunk + # FALLBACK: For small documents, if no chunks found, include all chunks + if not chunks and total_chunks < 5: + self.progress_callback(f"No semantic matches for small document, including all chunks as fallback") + for uri in document_uris: + norm_uri = self.store.normalize_uri(uri) + doc_chunks = await self.store._get_document_chunks(norm_uri) + for chunk in doc_chunks: + selected_chunks[chunk.metadata["id"]] = chunk + else: + for chunk in chunks: + selected_chunks[chunk.metadata["id"]] = chunk if not selected_chunks: self.progress_callback("No relevant content found in the documents")