Skip to content

Commit c4b70ac

Browse files
committed
[backend] Refactor to extract attachment props from engine.ts
1 parent a3d8203 commit c4b70ac

File tree

3 files changed

+69
-59
lines changed

3 files changed

+69
-59
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import type { Mutable } from '../types/type-utils';
2+
3+
// List of fields extracted by the attachment ingest processor.
4+
// The full list is available in the Elasticsearch docs:
5+
// (https://www.elastic.co/guide/en/elasticsearch/reference/8.19/attachment.html#attachment-fields).
6+
export const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH = [
7+
'content',
8+
'title',
9+
'author',
10+
'keywords',
11+
'date',
12+
'content_type',
13+
'content_length',
14+
'language',
15+
'modified',
16+
'format',
17+
// identifier, NOT EXTRACTED
18+
// contributor, NOT EXTRACTED
19+
// coverage, NOT EXTRACTED
20+
'modifier',
21+
'creator_tool',
22+
// publisher, NOT EXTRACTED
23+
// relation, NOT EXTRACTED
24+
// rights, NOT EXTRACTED
25+
// source, NOT EXTRACTED
26+
// type, NOT EXTRACTED
27+
'description',
28+
'print_date',
29+
'metadata_date',
30+
// latitude, NOT EXTRACTED
31+
// longitude, NOT EXTRACTED
32+
// altitude, NOT EXTRACTED
33+
// rating, NOT EXTRACTED
34+
'comments',
35+
] as const;
36+
37+
// List of fields extracted by the attachment ingest processor, for OpenSearch.
38+
// The full list is available in the OS docs:
39+
// (https://docs.opensearch.org/latest/install-and-configure/additional-plugins/ingest-attachment-plugin/#extracted-information),
40+
// and code shows the check rejects unknown fields with an exception:
41+
// https://github.com/opensearch-project/OpenSearch/blob/315481148edaa43410e2e9f1801ec903fd62ec20/plugins/ingest-attachment/src/main/java/org/opensearch/ingest/attachment/AttachmentProcessor.java#L277
42+
export const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH = [
43+
'content',
44+
'title',
45+
'author',
46+
'keywords',
47+
'date',
48+
'content_type',
49+
'content_length',
50+
'language',
51+
] as const;
52+
53+
// Union type of all properties extracted by the ES or OS attachment processor
54+
export type AttachmentProcessorExtractedProp = Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH>[number]
55+
| Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH>[number];

opencti-platform/opencti-graphql/src/database/engine.ts

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ import { IDS_ATTRIBUTES } from '../domain/attribute-utils';
198198
import { schemaRelationsRefDefinition } from '../schema/schema-relationsRef';
199199
import type { FiltersWithNested } from './middleware-loader';
200200
import { pushAll, unshiftAll } from '../utils/arrayUtil';
201+
import { ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH, ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH } from './attachment-processor-props';
201202

202203
const ELK_ENGINE = 'elk';
203204
const OPENSEARCH_ENGINE = 'opensearch';
@@ -299,59 +300,6 @@ const oebp = (queryResult: any): any => {
299300
return queryResult.body;
300301
};
301302

302-
// List of fields extracted by the attachment ingest processor.
303-
// The full list is available in the Elasticsearch docs:
304-
// (https://www.elastic.co/guide/en/elasticsearch/reference/8.19/attachment.html#attachment-fields).
305-
const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH = [
306-
'content',
307-
'title',
308-
'author',
309-
'keywords',
310-
'date',
311-
'content_type',
312-
'content_length',
313-
'language',
314-
'modified',
315-
'format',
316-
// identifier, NOT EXTRACTED
317-
// contributor, NOT EXTRACTED
318-
// coverage, NOT EXTRACTED
319-
'modifier',
320-
'creator_tool',
321-
// publisher, NOT EXTRACTED
322-
// relation, NOT EXTRACTED
323-
// rights, NOT EXTRACTED
324-
// source, NOT EXTRACTED
325-
// type, NOT EXTRACTED
326-
'description',
327-
'print_date',
328-
'metadata_date',
329-
// latitude, NOT EXTRACTED
330-
// longitude, NOT EXTRACTED
331-
// altitude, NOT EXTRACTED
332-
// rating, NOT EXTRACTED
333-
'comments',
334-
] as const;
335-
336-
// List of fields extracted by the attachment ingest processor, for OpenSearch.
337-
// The full list is available in the OS docs:
338-
// (https://docs.opensearch.org/latest/install-and-configure/additional-plugins/ingest-attachment-plugin/#extracted-information),
339-
// and code shows the check rejects unknown fields with an exception:
340-
// https://github.com/opensearch-project/OpenSearch/blob/315481148edaa43410e2e9f1801ec903fd62ec20/plugins/ingest-attachment/src/main/java/org/opensearch/ingest/attachment/AttachmentProcessor.java#L277
341-
const ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH = [
342-
'content',
343-
'title',
344-
'author',
345-
'keywords',
346-
'date',
347-
'content_type',
348-
'content_length',
349-
'language',
350-
] as const;
351-
352-
export type AttachmentProcessorExtractedProp = Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_ELASTICSEARCH>[number]
353-
& Mutable<typeof ATTACHMENT_PROCESSOR_EXTRACTED_PROPS_OPENSEARCH>[number];
354-
355303
export const elConfigureAttachmentProcessor = async (): Promise<boolean> => {
356304
let success = true;
357305
if (engine instanceof ElkClient) {

opencti-platform/opencti-graphql/src/modules/internal/document/document.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { AttachmentProcessorExtractedProp } from '../../../database/engine';
1+
import type { AttachmentProcessorExtractedProp } from '../../../database/attachment-processor-props';
22
import { ENTITY_TYPE_INTERNAL_FILE } from '../../../schema/internalObject';
33
import { schemaAttributesDefinition } from '../../../schema/schema-attributes';
44
import {
@@ -40,12 +40,19 @@ export const ATTACHMENT_MAPPINGS = [
4040
name: AttachmentProcessorExtractedProp;
4141
} & MappingDefinition<BasicStoreAttribute>)[];
4242

43-
type KeysInArray = typeof ATTACHMENT_MAPPINGS[number]['name'];
44-
type AttributesDefinitionWithCheck = Exclude<AttachmentProcessorExtractedProp, KeysInArray> extends never
45-
? Array<AttributeDefinition>
43+
// Compile-time shenanigans to make sure we don't forget to update
44+
// ATTACHMENT_MAPPINGS when/if we start extracting new fields
45+
// via the ES/OS attachment ingest pipeline.
46+
type AttachmentMappingsWithCheck = Exclude<
47+
AttachmentProcessorExtractedProp,
48+
typeof ATTACHMENT_MAPPINGS[number]['name']
49+
> extends never
50+
? MappingDefinition<BasicStoreAttribute>[]
4651
: 'Make sure ATTACHMENT_MAPPINGS defines one mapping for each AttachmentProcessorExtractedProp';
4752

48-
const attributes: AttributesDefinitionWithCheck = [
53+
const TYPE_CHECKED_ATTACHMENT_MAPPINGS: AttachmentMappingsWithCheck = ATTACHMENT_MAPPINGS;
54+
55+
const attributes: Array<AttributeDefinition> = [
4956
id,
5057
internalId,
5158
standardId,
@@ -110,7 +117,7 @@ const attributes: AttributesDefinitionWithCheck = [
110117
multiple: false,
111118
upsert: false,
112119
isFilterable: false,
113-
mappings: ATTACHMENT_MAPPINGS,
120+
mappings: TYPE_CHECKED_ATTACHMENT_MAPPINGS,
114121
},
115122
{ name: 'uploaded_at', label: 'Upload date', type: 'date', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: false },
116123
{ name: 'file_id', label: 'File identifier', type: 'string', format: 'short', mandatoryType: 'internal', editDefault: false, multiple: false, upsert: false, isFilterable: false },

0 commit comments

Comments
 (0)