Skip to content

Commit b973766

Browse files
[CYB-208] OCSF Support for Flink Indexing + JSON TableAPI (#88)
* json tableapi support * OCSF Support for Flink Indexing
1 parent 8748f24 commit b973766

File tree

13 files changed

+439
-119
lines changed

13 files changed

+439
-119
lines changed

flink-cyber/flink-cyber-api/src/main/java/com/cloudera/cyber/indexing/MappingColumnDto.java

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import com.fasterxml.jackson.annotation.JsonIgnore;
44
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import java.util.Arrays;
6+
import java.util.Collections;
7+
import java.util.List;
8+
import java.util.stream.Collectors;
59
import lombok.AllArgsConstructor;
610
import lombok.Data;
711
import lombok.NoArgsConstructor;
@@ -28,23 +32,35 @@ public class MappingColumnDto {
2832
private Boolean isMap;
2933

3034
@JsonIgnore
31-
public String getKafkaName() {
35+
public List<String> getKafkaNameList() {
3236
final String properName = kafkaName == null ? name : kafkaName;
3337
if (getIsMap()) {
34-
return String.format("['%s']", properName);
35-
} else {
36-
if (getPath().equals("..")) {
37-
return String.format("%s", properName);
38-
}
39-
return String.format(".%s", properName);
38+
return Collections.singletonList(String.format("['%s']", properName));
4039
}
40+
41+
String[] kafkaNamesSplit = properName.split(",");
42+
43+
return Arrays.stream(kafkaNamesSplit)
44+
.map(singleKafkaName -> {
45+
if (getPath().equals("..")) {
46+
return String.format("%s", singleKafkaName);
47+
}
48+
return String.format(".%s", singleKafkaName);
49+
})
50+
.collect(Collectors.toList());
4151
}
4252

4353
@JsonIgnore
44-
public String getRawKafkaName(){
54+
public String getRawKafkaName() {
4555
return kafkaName;
4656
}
4757

58+
@JsonProperty("path")
59+
public String getRawPath() {
60+
return this.path;
61+
}
62+
63+
@JsonIgnore
4864
public String getPath() {
4965
if (StringUtils.isEmpty(path)) {
5066
return "extensions";

flink-cyber/flink-indexing/flink-indexing-hive/src/main/java/com/cloudera/cyber/indexing/hive/tableapi/TableApiAbstractJob.java

Lines changed: 44 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,31 @@
1010
import com.fasterxml.jackson.core.type.TypeReference;
1111
import com.google.common.collect.Streams;
1212
import java.io.IOException;
13-
import java.util.*;
13+
import java.util.ArrayList;
14+
import java.util.Arrays;
15+
import java.util.Collection;
16+
import java.util.Collections;
17+
import java.util.HashMap;
18+
import java.util.HashSet;
19+
import java.util.LinkedHashMap;
20+
import java.util.List;
21+
import java.util.Map;
1422
import java.util.Map.Entry;
23+
import java.util.Objects;
24+
import java.util.Optional;
25+
import java.util.Set;
1526
import java.util.function.Function;
1627
import java.util.stream.Collectors;
1728
import org.apache.flink.api.java.utils.ParameterTool;
1829
import org.apache.flink.configuration.Configuration;
1930
import org.apache.flink.streaming.api.datastream.DataStream;
2031
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
2132
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
22-
import org.apache.flink.table.api.*;
33+
import org.apache.flink.table.api.DataTypes;
34+
import org.apache.flink.table.api.FormatDescriptor;
35+
import org.apache.flink.table.api.Schema;
36+
import org.apache.flink.table.api.SqlDialect;
37+
import org.apache.flink.table.api.TableDescriptor;
2338
import org.apache.flink.table.api.bridge.java.StreamStatementSet;
2439
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
2540
import org.apache.flink.table.catalog.Column;
@@ -355,30 +370,42 @@ protected String getInsertColumns(MappingDto mappingDto) {
355370
}
356371

357372
private String getFromColumns(MappingDto mappingDto, ResolvedSchema tableSchema) {
373+
System.out.println("Building from columns");
358374
return mappingDto.getColumnMapping().stream()
359375
.map(mappingColumnDto -> {
360-
final String kafkaName = mappingColumnDto.getKafkaName();
376+
final List<String> kafkaNameList = mappingColumnDto.getKafkaNameList();
361377
final String path = mappingColumnDto.getPath();
362378

363-
String fullPath;
364-
if (path.startsWith("..")) {
365-
fullPath = path.substring(2);
366-
} else {
367-
fullPath = String.format("message.%s", path);
368-
}
369-
if (StringUtils.hasText(fullPath)) {
370-
fullPath = String.join(".", fullPath.split("\\."));
371-
}
372-
373-
fullPath = fullPath + kafkaName;
379+
List<String> fullPathList = kafkaNameList.stream()
380+
.map(kafkaName -> {
381+
String fullPath;
382+
if (path.startsWith("..")) {
383+
fullPath = path.substring(2);
384+
} else {
385+
fullPath = String.format("message.%s", path);
386+
}
387+
if (StringUtils.hasText(fullPath)) {
388+
fullPath = String.join(".", fullPath.split("\\."));
389+
}
390+
391+
return "(" + fullPath + kafkaName + ")";
392+
}).collect(Collectors.toList());
374393

375394
Optional<Column> column = tableSchema.getColumn(mappingColumnDto.getName());
376395
final String transformation = column.map(value -> getTransformation(value.getDataType(), mappingColumnDto)).orElse("");
377396

397+
if (!CollectionUtils.isEmpty(mappingDto.getIgnoreFields())) {
398+
String fieldsToIgnore = mappingDto.getIgnoreFields().stream()
399+
.filter(StringUtils::hasText)
400+
.collect(Collectors.joining("','", "'", "'"));
401+
if (StringUtils.hasText(transformation)) {
402+
fullPathList.add(fieldsToIgnore);
403+
}
404+
}
405+
378406
return StringUtils.hasText(transformation)
379-
? String.format(transformation, "(" + fullPath + ")", mappingDto.getIgnoreFields().stream()
380-
.collect(Collectors.joining("','", "'", "'")))
381-
: fullPath;
407+
? String.format(transformation, fullPathList.toArray(new Object[0]))
408+
: String.join(", ", fullPathList);
382409
})
383410
.collect(Collectors.joining(", ", " ", " "));
384411
}

flink-cyber/flink-indexing/flink-indexing-hive/src/main/java/com/cloudera/cyber/indexing/hive/tableapi/TableApiJobFactory.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
package com.cloudera.cyber.indexing.hive.tableapi;
22

3+
import com.cloudera.cyber.indexing.hive.tableapi.impl.TableApiFilesystemJob;
34
import com.cloudera.cyber.indexing.hive.tableapi.impl.TableApiHiveJob;
45
import com.cloudera.cyber.indexing.hive.tableapi.impl.TableApiKafkaJob;
56
import com.cloudera.cyber.scoring.ScoredMessage;
7+
import java.io.IOException;
68
import org.apache.flink.api.java.utils.ParameterTool;
79
import org.apache.flink.streaming.api.datastream.DataStream;
810
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
911

10-
import java.io.IOException;
11-
1212
public class TableApiJobFactory {
1313

1414
public static TableApiAbstractJob getJobByConnectorName(String typeName, ParameterTool params, StreamExecutionEnvironment env, DataStream<ScoredMessage> source) throws IOException {
@@ -20,6 +20,8 @@ public static TableApiAbstractJob getJobByConnectorName(String typeName, Paramet
2020
return new TableApiHiveJob(params, env, source);
2121
case "kafka":
2222
return new TableApiKafkaJob(params, env, source);
23+
case "filesystem":
24+
return new TableApiFilesystemJob(params, env, source);
2325
default:
2426
throw new RuntimeException(String.format("Unknown job type name [%s] provided while the Flink writer is selected as TableAPI", typeName));
2527
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
package com.cloudera.cyber.indexing.hive.tableapi.impl;
2+
3+
import com.cloudera.cyber.indexing.hive.tableapi.TableApiAbstractJob;
4+
import com.cloudera.cyber.scoring.ScoredMessage;
5+
import java.io.IOException;
6+
import org.apache.flink.api.java.utils.ParameterTool;
7+
import org.apache.flink.streaming.api.datastream.DataStream;
8+
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
9+
import org.apache.flink.table.api.FormatDescriptor;
10+
import org.apache.flink.table.api.TableDescriptor;
11+
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
12+
13+
14+
public class TableApiFilesystemJob extends TableApiAbstractJob {
15+
16+
private static final String BASE_TABLE_JSON = "base-hive-table.json";
17+
private final String format;
18+
private final String path;
19+
20+
public TableApiFilesystemJob(ParameterTool params, StreamExecutionEnvironment env, DataStream<ScoredMessage> source)
21+
throws IOException {
22+
super(params, env, source, "Filesystem", BASE_TABLE_JSON);
23+
format = params.get("flink.files.format", "json");
24+
path = params.getRequired("flink.files.path");
25+
}
26+
27+
@Override
28+
protected StreamExecutionEnvironment jobReturnValue() {
29+
return null;
30+
}
31+
32+
@Override
33+
protected String getTableConnector() {
34+
return "filesystem";
35+
}
36+
37+
@Override
38+
protected FormatDescriptor getFormatDescriptor() {
39+
return FormatDescriptor.forFormat(format).build();
40+
}
41+
42+
@Override
43+
protected void registerCatalog(StreamTableEnvironment tableEnv) {
44+
}
45+
46+
@Override
47+
protected TableDescriptor.Builder fillTableOptions(TableDescriptor.Builder builder) {
48+
return super.fillTableOptions(builder)
49+
.option("path", path)
50+
.option("format", format)
51+
.option("sink.partition-commit.policy.kind", "success-file");
52+
}
53+
54+
}

flink-cyber/flink-indexing/flink-indexing-hive/src/test/java/com/cloudera/cyber/indexing/hive/tableapi/TableApiAbstractJobTest.java

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package com.cloudera.cyber.indexing.hive.tableapi;
22

3+
import static org.assertj.core.api.Assertions.assertThat;
34
import static org.assertj.core.api.Assertions.assertThatThrownBy;
5+
46
import com.cloudera.cyber.indexing.MappingColumnDto;
57
import com.cloudera.cyber.indexing.MappingDto;
68
import com.cloudera.cyber.indexing.hive.tableapi.impl.TableApiKafkaJob;
@@ -51,7 +53,40 @@ public static Stream<Arguments> mappingsData() {
5153
new MappingColumnDto("column6", null, null, null, false),
5254
new MappingColumnDto("column7", null, null, null, false),
5355
new MappingColumnDto("column8", null, null, null, false),
54-
new MappingColumnDto("column9", null, null, null, false))))));
56+
new MappingColumnDto("column9", null, null, null, false))))),
57+
58+
Arguments.of(Collections.singletonMap(GIVEN_TABLE_NAME, ResolvedSchema.of(
59+
Column.physical("column1", DataTypes.STRING()),
60+
Column.physical("column2", DataTypes.STRING()),
61+
Column.physical("column3", DataTypes.STRING()))),
62+
Collections.singletonMap(GIVEN_SOURCE,
63+
new MappingDto(GIVEN_TABLE_NAME, new ArrayList<>(), Arrays.asList(
64+
new MappingColumnDto("column1", "column1,column2", null, null, false),
65+
new MappingColumnDto("column2", "column3", null, null, false))))));
66+
}
67+
68+
public static Stream<Arguments> insertSqlData() {
69+
return Stream.of(
70+
Arguments.of("topic",
71+
new MappingDto("tableName", Collections.emptyList(), Collections.emptyList()),
72+
ResolvedSchema.of(),
73+
"CREATE TEMPORARY VIEW topic_tmpview( ) AS \n" +
74+
" SELECT \n" +
75+
" from KafkaTempView\n" +
76+
" where `source`='topic'"),
77+
Arguments.of("topic",
78+
new MappingDto("tableName", Collections.emptyList(), Arrays.asList(
79+
new MappingColumnDto("column1", "column1,column2", null, "ROW(%s, %s)", false),
80+
new MappingColumnDto("column2", "column3", null, null, false))),
81+
ResolvedSchema.of(
82+
Column.physical("column1", DataTypes.STRING()),
83+
Column.physical("column2", DataTypes.STRING()),
84+
Column.physical("column3", DataTypes.STRING())),
85+
"CREATE TEMPORARY VIEW topic_tmpview( column1, column2 ) AS \n" +
86+
" SELECT ROW((message.extensions.column1), (message.extensions.column2)), (message.extensions.column3) \n" +
87+
" from KafkaTempView\n" +
88+
" where `source`='topic'")
89+
);
5590
}
5691

5792
public static Stream<Arguments> mappingsExceptionData() {
@@ -92,13 +127,21 @@ public static Stream<Arguments> mappingsExceptionData() {
92127
"Found column mappings of non-string type without transformations for source [%s]: %s",
93128
GIVEN_SOURCE, "[column1]")));
94129
}
130+
95131
@ParameterizedTest
96132
@MethodSource("mappingsData")
97133
void shouldValidateMappings(Map<String, ResolvedSchema> givenTableSchemaMap,
98134
Map<String, MappingDto> givenTopicMapping) {
99135
job.validateMappings(givenTableSchemaMap, givenTopicMapping);
100136
}
101137

138+
@ParameterizedTest
139+
@MethodSource("insertSqlData")
140+
void shouldGenerateInsertSql(String topic, MappingDto mappingDto, ResolvedSchema tableSchema, String expectedSql) {
141+
String actualSql = job.buildInsertSql(topic, mappingDto, tableSchema);
142+
assertThat(actualSql).isEqualTo(expectedSql);
143+
}
144+
102145
@ParameterizedTest
103146
@MethodSource("mappingsExceptionData")
104147
void shouldThrowExceptionWhenValidateMappings(Map<String, ResolvedSchema> givenTableSchemaMap,

flink-cyber/metron-parser-chain/parser-chains-config-service/frontend/parser-chains-client/src/app/chain-page/chain-page.models.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ export interface IndexTableMapping {
6666
column_mapping: IndexingColumnMapping[]
6767
}
6868

69+
export interface TableColumnDto {
70+
name: string;
71+
type: string;
72+
nullable: boolean;
73+
}
74+
6975
export interface IndexingColumnMapping {
7076
name: string;
7177
kafka_name?: string;

flink-cyber/metron-parser-chain/parser-chains-config-service/frontend/parser-chains-client/src/app/chain-page/components/ocsf-form/ocsf-form.component.html

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,23 @@
6363
nzText="Import/Export"
6464
nzOrientation="center">
6565
</nz-divider>
66+
<nz-form-item>
67+
<nz-form-label>Table Config path</nz-form-label>
68+
<nz-form-control>
69+
<nz-input-group nzPrefixIcon="file">
70+
<input formControlName="_tableFilePath"
71+
nz-input
72+
placeholder="/path/to/tableConfig.json"/>
73+
</nz-input-group>
74+
</nz-form-control>
75+
</nz-form-item>
6676
<nz-form-item>
6777
<nz-form-label>Mapping path</nz-form-label>
6878
<nz-form-control>
6979
<nz-input-group nzPrefixIcon="file">
70-
<input formControlName="_filePath"
80+
<input formControlName="_mappingFilePath"
7181
nz-input
72-
placeholder="/path/to/file.json"/>
82+
placeholder="/path/to/mappingConfig.json"/>
7383
</nz-input-group>
7484
</nz-form-control>
7585
</nz-form-item>

0 commit comments

Comments
 (0)