Skip to content

Commit 698cd42

Browse files
committed
[backend] Fix strict_dynamic_mapping_exception exceptions thrown in fileIndexManager (#89)
The issue arose because of missing index mappings in the attachment sub-document: we use an Elasticsearch pipeline processor for attachments that extracts fields for us. By default this processor extracts all the fields it can: https://www.elastic.co/guide/en/elasticsearch/reference/8.19/attachment.html#attachment-fields. This commit specifies which fields to extract: for those enforce an index mapping def
1 parent 89cedb2 commit 698cd42

File tree

3 files changed

+87
-20
lines changed

3 files changed

+87
-20
lines changed

opencti-platform/opencti-graphql/src/database/engine.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ import type {
192192
StoreRelation,
193193
} from '../types/store';
194194
import type { BasicStoreSettings } from '../types/settings';
195+
import type { Mutable } from '../types/type-utils';
195196
import { completeSpecialFilterKeys } from '../utils/filtering/filtering-completeSpecialFilterKeys';
196197
import { IDS_ATTRIBUTES } from '../domain/attribute-utils';
197198
import { schemaRelationsRefDefinition } from '../schema/schema-relationsRef';
@@ -298,6 +299,42 @@ const oebp = (queryResult: any): any => {
298299
return queryResult.body;
299300
};
300301

302+
// List of fields extracted by the attachment ingest processor.
303+
// The full list is available in the Elasticsearch docs:
304+
// (https://www.elastic.co/guide/en/elasticsearch/reference/8.19/attachment.html#attachment-fields).
305+
const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS = [
306+
'content',
307+
'title',
308+
'author',
309+
'keywords',
310+
'date',
311+
'content_type',
312+
'content_length',
313+
'language',
314+
'modified',
315+
'format',
316+
// identifier, NOT EXTRACTED
317+
// contributor, NOT EXTRACTED
318+
// coverage, NOT EXTRACTED
319+
'modifier',
320+
'creator_tool',
321+
// publisher, NOT EXTRACTED
322+
// relation, NOT EXTRACTED
323+
// rights, NOT EXTRACTED
324+
// source, NOT EXTRACTED
325+
// type, NOT EXTRACTED
326+
'description',
327+
'print_date',
328+
'metadata_date',
329+
// latitude, NOT EXTRACTED
330+
// longitude, NOT EXTRACTED
331+
// altitude, NOT EXTRACTED
332+
// rating, NOT EXTRACTED
333+
'comments',
334+
] as const;
335+
336+
export type AttachmentProcessorExtractedProp = Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS>[number];
337+
301338
export const elConfigureAttachmentProcessor = async (): Promise<boolean> => {
302339
let success = true;
303340
if (engine instanceof ElkClient) {
@@ -309,6 +346,7 @@ export const elConfigureAttachmentProcessor = async (): Promise<boolean> => {
309346
attachment: {
310347
field: 'file_data',
311348
remove_binary: true,
349+
properties: ATTACHMENT_PROCESSOR_EXTRACTED_PROPS as Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS>,
312350
},
313351
},
314352
],

opencti-platform/opencti-graphql/src/modules/internal/document/document.ts

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,51 @@
1+
import type { AttachmentProcessorExtractedProp } from '../../../database/engine';
12
import { ENTITY_TYPE_INTERNAL_FILE } from '../../../schema/internalObject';
23
import { schemaAttributesDefinition } from '../../../schema/schema-attributes';
3-
import { type AttributeDefinition, createdAt, creators, entityType, id, internalId, parentTypes, refreshedAt, standardId, updatedAt } from '../../../schema/attribute-definition';
4+
import {
5+
type AttributeDefinition,
6+
createdAt,
7+
creators,
8+
entityType,
9+
id,
10+
internalId,
11+
parentTypes,
12+
refreshedAt,
13+
standardId,
14+
updatedAt,
15+
type MappingDefinition,
16+
type BasicStoreAttribute,
17+
} from '../../../schema/attribute-definition';
418
import { ENTITY_TYPE_MARKING_DEFINITION } from '../../../schema/stixMetaObject';
519
import { ABSTRACT_STIX_CORE_OBJECT } from '../../../schema/general';
620
import { UPLOAD_STATUS_VALUES } from './document-domain';
721

8-
const attributes: Array<AttributeDefinition> = [
22+
export const ATTACHMENT_MAPPINGS = [
23+
{ name: 'author', label: 'Author', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
24+
{ name: 'comments', label: 'Comments', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
25+
{ name: 'content', label: 'Content', type: 'string', format: 'text', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
26+
{ name: 'content_length', label: 'Content length', type: 'numeric', precision: 'integer', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
27+
{ name: 'content_type', label: 'Content type', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
28+
{ name: 'creator_tool', label: 'Creator tool', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
29+
{ name: 'date', label: 'Created date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
30+
{ name: 'description', label: 'Description', type: 'string', format: 'text', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
31+
{ name: 'format', label: 'Format', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
32+
{ name: 'keywords', label: 'Keywords', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
33+
{ name: 'language', label: 'Language', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
34+
{ name: 'metadata_date', label: 'Metadata date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
35+
{ name: 'modified', label: 'Modified date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
36+
{ name: 'modifier', label: 'Modifier', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
37+
{ name: 'print_date', label: 'Print date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
38+
{ name: 'title', label: 'Title', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
39+
] satisfies ({
40+
name: AttachmentProcessorExtractedProp;
41+
} & MappingDefinition<BasicStoreAttribute>)[];
42+
43+
type KeysInArray = typeof ATTACHMENT_MAPPINGS[number]['name'];
44+
type AttributesDefinitionWithCheck = Exclude<AttachmentProcessorExtractedProp, KeysInArray> extends never
45+
? Array<AttributeDefinition>
46+
: 'Make sure ATTACHMENT_MAPPINGS defines one mapping for each AttachmentProcessorExtractedProp';
47+
48+
const attributes: AttributesDefinitionWithCheck = [
949
id,
1050
internalId,
1151
standardId,
@@ -70,24 +110,7 @@ const attributes: Array<AttributeDefinition> = [
70110
multiple: false,
71111
upsert: false,
72112
isFilterable: false,
73-
mappings: [
74-
{ name: 'author', label: 'Author', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
75-
{ name: 'comments', label: 'Comments', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
76-
{ name: 'content', label: 'Content', type: 'string', format: 'text', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
77-
{ name: 'content_length', label: 'Content length', type: 'numeric', precision: 'integer', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
78-
{ name: 'content_type', label: 'Content type', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
79-
{ name: 'creator_tool', label: 'Creator tool', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
80-
{ name: 'date', label: 'Created date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
81-
{ name: 'description', label: 'Description', type: 'string', format: 'text', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
82-
{ name: 'format', label: 'Format', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
83-
{ name: 'keywords', label: 'Keywords', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
84-
{ name: 'language', label: 'Language', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
85-
{ name: 'metadata_date', label: 'Metadata date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
86-
{ name: 'modified', label: 'Modified date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
87-
{ name: 'modifier', label: 'Modifier', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
88-
{ name: 'print_date', label: 'Print date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
89-
{ name: 'title', label: 'Title', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: true },
90-
],
113+
mappings: ATTACHMENT_MAPPINGS,
91114
},
92115
{ name: 'uploaded_at', label: 'Upload date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: false },
93116
{ name: 'file_id', label: 'File identifier', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: false },

opencti-platform/opencti-graphql/src/types/type-utils.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,9 @@ import { isNotEmptyField } from '../database/utils';
33
export const filterEmpty = <T>(data: T | null | undefined): data is T => {
44
return isNotEmptyField(data);
55
};
6+
7+
/**
8+
* Inverse operation of the built-in Readonly<T> utility type:
9+
* makes all records of an object mutable.
10+
*/
11+
export type Mutable<T> = { -readonly [P in keyof T]: T[P]; };

0 commit comments

Comments
 (0)