Skip to content

Commit 1bb0554

Browse files
Merge branch 'feature/default-field-optimization' into 'main'
Do not search in all fields by default # 📌 Summary With this change we will remove searching in all fields by default. `*:` should be used if searching in all fields is still required. ## 🔗 Issue Reference See merge request swiss-armed-forces/cyber-command/cea/loom!223
2 parents e5fd6cf + 9253aae commit 1bb0554

File tree

5 files changed

+73
-12
lines changed

5 files changed

+73
-12
lines changed

.vscode/launch.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"remoteRoot": "/code"
2323
}
2424
],
25-
"justMyCode": true
25+
"justMyCode": false
2626
},
2727
{
2828
"name": "Attach Worker",
@@ -82,7 +82,7 @@
8282
"remoteRoot": "/code"
8383
}
8484
],
85-
"justMyCode": true
85+
"justMyCode": false
8686
},
8787
{
8888
"name": "Attach Crawler",
@@ -98,7 +98,7 @@
9898
"remoteRoot": "/code"
9999
}
100100
],
101-
"justMyCode": true
101+
"justMyCode": false
102102
},
103103
{
104104
"name": "Attach by Port (Prompt)",
@@ -114,7 +114,7 @@
114114
"remoteRoot": "/code"
115115
}
116116
],
117-
"justMyCode": true
117+
"justMyCode": false
118118
}
119119
],
120120
"inputs": [

Frontend/src/features/search/container/ResultCard.tsx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,12 @@ export const ResultCard: React.FC<ResultCardProps> = React.memo(
6868
inView: inView,
6969
}),
7070
);
71-
}, [inView, fileId, dispatch]);
71+
}, [
72+
inView,
73+
fileId,
74+
searchQuery, // this is required here as we want to re-run this every time the user changes query. The same file might be in the old and new query results
75+
dispatch,
76+
]);
7277

7378
const handleViewDetail = () => {
7479
dispatch(

backend/common/common/file/file_repository.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ class Meta: # pylint: disable=too-few-public-methods
299299
)
300300

301301
class Index: # pylint: disable=too-few-public-methods
302-
"""The index."""
303302

304303
name = "file"
305304
settings = {
@@ -519,8 +518,12 @@ def get_embedding_generator_by_knn(
519518
filter_query = Q(
520519
"query_string",
521520
query=self._query_builder.build(query),
522-
default_field="*",
523521
default_operator="AND",
522+
# Be lenient because the index-level default fields are set dynamically
523+
# (index.query.default_field). Those fields can span multiple data types
524+
# (text, numeric, boolean, date), and query_string parsing would otherwise
525+
# fail when a term is incompatible with a field's type.
526+
lenient=True,
524527
)
525528

526529
for q in embedding_vectors:

backend/common/common/models/es_repository.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,51 @@ def to_es_dict(self) -> dict:
114114

115115
return document_dict
116116

117+
@classmethod
118+
def get_default_fields(cls) -> list[str]:
119+
"""Extract all field names from the document mapping, including nested fields
120+
and multi-fields.
121+
122+
Returns a list of field names that can be used for index.query.default_field
123+
setting to limit default search scope and improve performance.
124+
"""
125+
126+
def extract_fields(mapping: dict, prefix: str = "") -> list[str]:
127+
"""Recursively extract field names from mapping dictionary.
128+
129+
Args:
130+
mapping: The mapping dictionary (can contain 'properties' or 'fields')
131+
prefix: The current field path prefix (for nested fields)
132+
133+
Returns:
134+
List of field names including nested paths
135+
(e.g., 'parent.child', 'title.keyword')
136+
"""
137+
fields = []
138+
for field_name, field_config in mapping.items():
139+
# Build the full field path
140+
full_path = f"{prefix}.{field_name}" if prefix else field_name
141+
fields.append(full_path)
142+
143+
if isinstance(field_config, dict):
144+
# Recurse into nested object properties
145+
if "properties" in field_config:
146+
nested_fields = extract_fields(
147+
field_config["properties"], full_path
148+
)
149+
fields.extend(nested_fields)
150+
151+
# Recurse into multi-fields
152+
if "fields" in field_config:
153+
multi_fields = extract_fields(field_config["fields"], full_path)
154+
fields.extend(multi_fields)
155+
156+
return fields
157+
158+
mapping_dict = cls._doc_type.mapping.to_dict()
159+
field_names = extract_fields(mapping_dict.get("properties", {}))
160+
return field_names
161+
117162

118163
EsRepositoryObjectT = TypeVar("EsRepositoryObjectT", bound=EsRepositoryObject)
119164
EsRepositoryDocumentT = TypeVar("EsRepositoryDocumentT", bound=_EsRepositoryDocument)
@@ -307,8 +352,12 @@ def _get_search_by_query(
307352
search = search.query(
308353
"query_string",
309354
query=query_string,
310-
default_field="*",
311355
default_operator="AND",
356+
# Be lenient because the index-level default fields are set dynamically
357+
# (index.query.default_field). Those fields can span multiple data types
358+
# (text, numeric, boolean, date), and query_string parsing would otherwise
359+
# fail when a term is incompatible with a field's type.
360+
lenient=True,
312361
)
313362

314363
return search
@@ -522,6 +571,10 @@ def init(self):
522571
using=self._elasticsearch
523572
):
524573
self._index.close(using=self._elasticsearch)
574+
# set settings before initializing
575+
self._index.settings(
576+
query={"default_field": self._document_type.get_default_fields()}
577+
)
525578
# initialize
526579
self._document_type.init(using=self._elasticsearch)
527580
# after initialization: open the index

charts/templates/elasticsearch/statefulset.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,20 @@ spec:
4343
httpGet:
4444
path: /_cluster/health?wait_for_status=yellow&timeout=10s
4545
port: {{ .Values.elasticsearch.service.port }}
46-
initialDelaySeconds: {{ default 0 .Values.elasticsearch.readinessProbe.initialDelaySeconds }}
46+
initialDelaySeconds: {{ default 5 .Values.elasticsearch.readinessProbe.initialDelaySeconds }}
4747
periodSeconds: {{ default 15 .Values.elasticsearch.readinessProbe.periodSeconds }}
4848
timeoutSeconds: {{ default 12 .Values.elasticsearch.readinessProbe.timeoutSeconds }}
49-
failureThreshold: {{ default 3 .Values.elasticsearch.readinessProbe.failureThreshold }}
49+
failureThreshold: {{ default 10 .Values.elasticsearch.readinessProbe.failureThreshold }}
5050
{{- end }}
5151
{{- if .Values.elasticsearch.livenessProbe.enabled }}
5252
livenessProbe:
5353
httpGet:
5454
path: /_cluster/health?wait_for_status=yellow&timeout=10s
5555
port: {{ .Values.elasticsearch.service.port }}
56-
initialDelaySeconds: {{ default 0 .Values.elasticsearch.livenessProbe.initialDelaySeconds }}
56+
initialDelaySeconds: {{ default 5 .Values.elasticsearch.livenessProbe.initialDelaySeconds }}
5757
periodSeconds: {{ default 15 .Values.elasticsearch.livenessProbe.periodSeconds }}
5858
timeoutSeconds: {{ default 12 .Values.elasticsearch.livenessProbe.timeoutSeconds }}
59-
failureThreshold: {{ default 6 .Values.elasticsearch.livenessProbe.failureThreshold }}
59+
failureThreshold: {{ default 15 .Values.elasticsearch.livenessProbe.failureThreshold }}
6060
{{- end }}
6161
{{- if .Values.elasticsearch.resources }}
6262
resources:

0 commit comments

Comments
 (0)