Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions docker/base/fs/etc/searxng/limiter.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[real_ip]
[botdetection.proxy]
# Number of values to trust for X-Forwarded-For.
x_for = 1

Expand All @@ -7,6 +7,11 @@ x_for = 1
ipv4_prefix = 32
ipv6_prefix = 48

trusted_proxies = [
"127.0.0.0/8",
"::1",
]

[botdetection.ip_limit]
# To get unlimited access in a local network, by default link-local addresses
# (networks) are not monitored by the ip_limit
Expand All @@ -19,14 +24,8 @@ link_token = false
# In the limiter, the ip_lists method has priority over all other methods.
# If an IP is in the pass_ip list, it has unrestricted access and is not
# checked if, for example, the "user agent" suggests a bot (e.g., curl).
block_ip = [
# '93.184.216.34', # Example IPv4 address
# '257.1.1.1', # Invalid IP --> will be ignored, logged in ERROR class
]
pass_ip = [
# '192.168.0.0/16', # IPv4 private network
# 'fe80::/10', # IPv6 link-local; overrides botdetection.ip_limit.filter_link_local
]
block_ip = []
pass_ip = []

# Activate passlist of (hardcoded) IPs from the SearXNG organization,
# e.g., `check.searx.space`.
Expand Down
2 changes: 1 addition & 1 deletion docker/base/fs/etc/searxng/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ enabled_plugins:
- 'Hash plugin'
- 'Self Informations'
- 'Tracker URL remover'
- 'Ahmia blacklist'

# - 'Hostnames plugin' # see 'hostnames' configuration below
# - 'Open Access DOI rewrite'

Expand Down
17 changes: 8 additions & 9 deletions docker/run/fs/etc/searxng/limiter.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[real_ip]
[botdetection.proxy]
# Number of values to trust for X-Forwarded-For.
x_for = 1

Expand All @@ -7,6 +7,11 @@ x_for = 1
ipv4_prefix = 32
ipv6_prefix = 48

trusted_proxies = [
"127.0.0.0/8",
"::1",
]

[botdetection.ip_limit]
# To get unlimited access in a local network, by default link-local addresses
# (networks) are not monitored by the ip_limit
Expand All @@ -19,14 +24,8 @@ link_token = false
# In the limiter, the ip_lists method has priority over all other methods.
# If an IP is in the pass_ip list, it has unrestricted access and is not
# checked if, for example, the "user agent" suggests a bot (e.g., curl).
block_ip = [
# '93.184.216.34', # Example IPv4 address
# '257.1.1.1', # Invalid IP --> will be ignored, logged in ERROR class
]
pass_ip = [
# '192.168.0.0/16', # IPv4 private network
# 'fe80::/10', # IPv6 link-local; overrides botdetection.ip_limit.filter_link_local
]
block_ip = []
pass_ip = []

# Activate passlist of (hardcoded) IPs from the SearXNG organization,
# e.g., `check.searx.space`.
Expand Down
2 changes: 1 addition & 1 deletion docker/run/fs/etc/searxng/settings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ enabled_plugins:
- 'Hash plugin'
- 'Self Informations'
- 'Tracker URL remover'
- 'Ahmia blacklist'

# - 'Hostnames plugin' # see 'hostnames' configuration below
# - 'Open Access DOI rewrite'

Expand Down
18 changes: 9 additions & 9 deletions python/extensions/agent_init/_15_load_profile_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class LoadProfileSettings(Extension):

async def execute(self, **kwargs) -> None:

if not self.agent or not self.agent.config.profile:
Expand Down Expand Up @@ -36,18 +36,18 @@ async def execute(self, **kwargs) -> None:
if settings_override:
# Preserve the original memory_subdir unless it's explicitly overridden
current_memory_subdir = self.agent.config.memory_subdir
# FIX: Also preserve the original profile
original_profile = self.agent.config.profile

new_config = initialize_agent(override_settings=settings_override)

if (
"agent_memory_subdir" not in settings_override
and current_memory_subdir != "default"
):
new_config.memory_subdir = current_memory_subdir

# FIX: Restore the original profile
new_config.profile = original_profile

self.agent.config = new_config
# self.agent.context.log.log(
# type="info",
# content=(
# "Loaded custom settings for agent "
# f"{self.agent.number} with profile '{self.agent.config.profile}'."
# ),
# )

36 changes: 33 additions & 3 deletions python/helpers/document_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,27 @@ async def document_qa(
*[self.document_get_content(uri, True) for uri in document_uris]
)
await self.agent.handle_intervention()

# Calculate adaptive threshold based on document sizes
# Count total chunks across all documents
total_chunks = 0
indexed_uris = []
for uri in document_uris:
norm_uri = self.store.normalize_uri(uri)
chunks = await self.store._get_document_chunks(norm_uri)
total_chunks += len(chunks)
indexed_uris.append(norm_uri)

# For very small documents, use lower threshold to ensure retrieval
if total_chunks < 5:
adaptive_threshold = 0.0 # Accept any similarity for tiny docs
elif total_chunks < 10:
adaptive_threshold = 0.3
else:
adaptive_threshold = DEFAULT_SEARCH_THRESHOLD

self.progress_callback(f"Total document chunks: {total_chunks}, Using adaptive search threshold: {adaptive_threshold}")

selected_chunks = {}
for question in questions:
self.progress_callback(f"Optimizing query: {question}")
Expand All @@ -399,14 +420,23 @@ async def document_qa(
chunks = await self.store.search_documents(
query=optimized_query,
limit=100,
threshold=DEFAULT_SEARCH_THRESHOLD,
threshold=adaptive_threshold,
filter=doc_filter,
)

self.progress_callback(f"Found {len(chunks)} chunks")

for chunk in chunks:
selected_chunks[chunk.metadata["id"]] = chunk
# FALLBACK: For small documents, if no chunks found, include all chunks
if not chunks and total_chunks < 5:
self.progress_callback(f"No semantic matches for small document, including all chunks as fallback")
for uri in document_uris:
norm_uri = self.store.normalize_uri(uri)
doc_chunks = await self.store._get_document_chunks(norm_uri)
for chunk in doc_chunks:
selected_chunks[chunk.metadata["id"]] = chunk
else:
for chunk in chunks:
selected_chunks[chunk.metadata["id"]] = chunk

if not selected_chunks:
self.progress_callback("No relevant content found in the documents")
Expand Down