Skip to content

Commit 8c1f42c

Browse files
Backport to branch(3.11) : Add Control file module files and validation (#2528)
Co-authored-by: inv-jishnu <[email protected]>
1 parent 2fa1e36 commit 8c1f42c

File tree

5 files changed

+868
-0
lines changed

5 files changed

+868
-0
lines changed

data-loader/core/src/main/java/com/scalar/db/dataloader/core/ErrorMessage.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,18 @@ public class ErrorMessage {
2424
public static final String MISSING_NAMESPACE_OR_TABLE = "Missing namespace or table: %s, %s";
2525
public static final String TABLE_METADATA_RETRIEVAL_FAILED =
2626
"Failed to retrieve table metadata. Details: %s";
27+
public static final String DUPLICATE_DATA_MAPPINGS =
28+
"Duplicate data mappings found for table '%s' in the control file";
29+
public static final String MISSING_COLUMN_MAPPING =
30+
"No mapping found for column '%s' in table '%s' in the control file. Control file validation set at 'FULL'. All columns need to be mapped.";
31+
public static final String CONTROL_FILE_MISSING_DATA_MAPPINGS =
32+
"The control file is missing data mappings";
33+
public static final String TARGET_COLUMN_NOT_FOUND =
34+
"The target column '%s' for source field '%s' could not be found in table '%s'";
35+
public static final String MISSING_PARTITION_KEY =
36+
"The required partition key '%s' is missing in the control file mapping for table '%s'";
37+
public static final String MISSING_CLUSTERING_KEY =
38+
"The required clustering key '%s' is missing in the control file mapping for table '%s'";
39+
public static final String MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND =
40+
"Duplicated data mappings found for column '%s' in table '%s'";
2741
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package com.scalar.db.dataloader.core.dataimport.controlfile;
2+
3+
import com.fasterxml.jackson.annotation.JsonCreator;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import java.util.ArrayList;
6+
import java.util.List;
7+
import lombok.Getter;
8+
import lombok.Setter;
9+
10+
/**
11+
* Represents a control file that holds control file tables which contains the column mappings that
12+
* maps a source file column to the actual database table column.
13+
*/
14+
@Getter
15+
@Setter
16+
public class ControlFile {
17+
18+
/**
19+
* A list of {@link ControlFileTable} objects representing the tables defined in the control file.
20+
*/
21+
@JsonProperty("tables")
22+
private final List<ControlFileTable> tables;
23+
24+
/** Default constructor that initializes an empty list of tables. */
25+
public ControlFile() {
26+
this.tables = new ArrayList<>();
27+
}
28+
29+
/**
30+
* Constructs a {@code ControlFile} with the specified list of tables.
31+
*
32+
* @param tables the list of {@link ControlFileTable} objects to initialize the control file with
33+
*/
34+
@JsonCreator
35+
public ControlFile(@JsonProperty("tables") List<ControlFileTable> tables) {
36+
this.tables = tables;
37+
}
38+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package com.scalar.db.dataloader.core.dataimport.controlfile;
2+
3+
/** Represents the control file */
4+
public class ControlFileValidationException extends Exception {
5+
6+
/**
7+
* Class constructor
8+
*
9+
* @param message error message
10+
*/
11+
public ControlFileValidationException(String message) {
12+
super(message);
13+
}
14+
}
Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
package com.scalar.db.dataloader.core.dataimport.controlfile;
2+
3+
import static com.scalar.db.dataloader.core.ErrorMessage.CONTROL_FILE_MISSING_DATA_MAPPINGS;
4+
import static com.scalar.db.dataloader.core.ErrorMessage.DUPLICATE_DATA_MAPPINGS;
5+
import static com.scalar.db.dataloader.core.ErrorMessage.MISSING_CLUSTERING_KEY;
6+
import static com.scalar.db.dataloader.core.ErrorMessage.MISSING_COLUMN_MAPPING;
7+
import static com.scalar.db.dataloader.core.ErrorMessage.MISSING_NAMESPACE_OR_TABLE;
8+
import static com.scalar.db.dataloader.core.ErrorMessage.MISSING_PARTITION_KEY;
9+
import static com.scalar.db.dataloader.core.ErrorMessage.MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND;
10+
import static com.scalar.db.dataloader.core.ErrorMessage.TARGET_COLUMN_NOT_FOUND;
11+
12+
import com.scalar.db.api.TableMetadata;
13+
import com.scalar.db.dataloader.core.util.RuntimeUtil;
14+
import com.scalar.db.dataloader.core.util.TableMetadataUtil;
15+
import java.util.HashSet;
16+
import java.util.LinkedHashSet;
17+
import java.util.Map;
18+
import java.util.Set;
19+
20+
/** Class to validate a control file */
21+
public class ControlFileValidator {
22+
23+
/**
24+
* Validate a control file
25+
*
26+
* @param controlFile Control file instance
27+
* @param controlFileValidationMode Defines the strictness of the control file validation
28+
* @param tableMetadataMap Metadata for one or more ScalarDB tables
29+
* @throws ControlFileValidationException when the control file is invalid
30+
*/
31+
public static void validate(
32+
ControlFile controlFile,
33+
ControlFileValidationLevel controlFileValidationMode,
34+
Map<String, TableMetadata> tableMetadataMap)
35+
throws ControlFileValidationException {
36+
37+
// Method argument null check
38+
RuntimeUtil.checkNotNull(controlFile, controlFileValidationMode, tableMetadataMap);
39+
40+
// Make sure the control file is not empty
41+
checkEmptyMappings(controlFile);
42+
43+
// Table metadata existence and target column validation
44+
Set<String> uniqueTables = new HashSet<>();
45+
for (ControlFileTable controlFileTable : controlFile.getTables()) {
46+
String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable);
47+
48+
// Make sure that multiple table mappings for one table do not exist
49+
if (uniqueTables.contains(lookupKey)) {
50+
throw new ControlFileValidationException(String.format(DUPLICATE_DATA_MAPPINGS, lookupKey));
51+
}
52+
uniqueTables.add(lookupKey);
53+
54+
// Make sure no column is mapped multiple times
55+
Set<String> mappedTargetColumns = getTargetColumnSet(controlFileTable);
56+
57+
// Make sure table metadata is provided for each table mentioned in the data mappings
58+
checkMultiTableMetadata(tableMetadataMap, controlFileTable);
59+
60+
TableMetadata tableMetadata = tableMetadataMap.get(lookupKey);
61+
62+
// Make sure the specified target columns in the mappings actually exist
63+
checkIfTargetColumnExist(tableMetadata, controlFileTable);
64+
65+
// Make sure all table columns are mapped
66+
if (controlFileValidationMode == ControlFileValidationLevel.FULL) {
67+
checkIfAllColumnsAreMapped(tableMetadata, mappedTargetColumns, controlFileTable);
68+
continue;
69+
}
70+
71+
// Make sure all keys (partition keys and clustering keys) are mapped
72+
if (controlFileValidationMode == ControlFileValidationLevel.KEYS) {
73+
checkPartitionKeys(tableMetadata, mappedTargetColumns, controlFileTable);
74+
checkClusteringKeys(tableMetadata, mappedTargetColumns, controlFileTable);
75+
}
76+
}
77+
}
78+
79+
/**
80+
* Check that all table columns are mapped in the control file. Ran only when the control file
81+
* validation mode is set to 'FULL'
82+
*
83+
* @param tableMetadata Metadata for one ScalarDB table
84+
* @param mappedTargetColumns All target columns that are mapped in the control file
85+
* @param controlFileTable Control file entry for one ScalarDB table
86+
* @throws ControlFileValidationException when there is a column that is not mapped in the control
87+
* file
88+
*/
89+
private static void checkIfAllColumnsAreMapped(
90+
TableMetadata tableMetadata,
91+
Set<String> mappedTargetColumns,
92+
ControlFileTable controlFileTable)
93+
throws ControlFileValidationException {
94+
LinkedHashSet<String> columnNames = tableMetadata.getColumnNames();
95+
for (String columnName : columnNames) {
96+
if (!mappedTargetColumns.contains(columnName)) {
97+
throw new ControlFileValidationException(
98+
String.format(
99+
MISSING_COLUMN_MAPPING,
100+
columnName,
101+
TableMetadataUtil.getTableLookupKey(controlFileTable)));
102+
}
103+
}
104+
}
105+
106+
/**
107+
* Check that the control file has mappings for at least one table
108+
*
109+
* @param controlFile Control file instance
110+
* @throws ControlFileValidationException when the control file has no mappings for any table
111+
*/
112+
private static void checkEmptyMappings(ControlFile controlFile)
113+
throws ControlFileValidationException {
114+
// Make sure data mapping for at least one table is provided
115+
if (controlFile.getTables().isEmpty()) {
116+
throw new ControlFileValidationException(CONTROL_FILE_MISSING_DATA_MAPPINGS);
117+
}
118+
}
119+
120+
/**
121+
* Check that metadata is provided for each table that is mapped in the control file. If the table
122+
* metadata is missing this probably means the namespace and table combination does not exist.
123+
*
124+
* @param tableMetadataMap Metadata for one or more ScalarDB tables
125+
* @param controlFileTable Control file entry for one ScalarDB table
126+
* @throws ControlFileValidationException when metadata for a mapped table is missing
127+
*/
128+
private static void checkMultiTableMetadata(
129+
Map<String, TableMetadata> tableMetadataMap, ControlFileTable controlFileTable)
130+
throws ControlFileValidationException {
131+
// Make sure table metadata is available for each table data mapping
132+
String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable);
133+
if (!tableMetadataMap.containsKey(lookupKey)) {
134+
throw new ControlFileValidationException(
135+
String.format(
136+
MISSING_NAMESPACE_OR_TABLE,
137+
controlFileTable.getNamespace(),
138+
controlFileTable.getTable()));
139+
}
140+
}
141+
142+
/**
143+
* Check that the mapped target column exists in the provided table metadata.
144+
*
145+
* @param tableMetadata Metadata for the table
146+
* @param controlFileTable Control file entry for one ScalarDB table
147+
* @throws ControlFileValidationException when the target column does not exist
148+
*/
149+
private static void checkIfTargetColumnExist(
150+
TableMetadata tableMetadata, ControlFileTable controlFileTable)
151+
throws ControlFileValidationException {
152+
153+
String lookupKey = TableMetadataUtil.getTableLookupKey(controlFileTable);
154+
LinkedHashSet<String> columnNames = tableMetadata.getColumnNames();
155+
156+
for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) {
157+
// Make sure the target fields are found in the table metadata
158+
if (!columnNames.contains(mapping.getTargetColumn())) {
159+
throw new ControlFileValidationException(
160+
String.format(
161+
TARGET_COLUMN_NOT_FOUND,
162+
mapping.getTargetColumn(),
163+
mapping.getSourceField(),
164+
lookupKey));
165+
}
166+
}
167+
}
168+
169+
/**
170+
* Check that the required partition keys are mapped in the control file. Ran only for control
171+
* file validation mode KEYS and FULL.
172+
*
173+
* @param tableMetadata Metadata for one ScalarDB table
174+
* @param mappedTargetColumns Set of target columns that are mapped in the control file
175+
* @param controlFileTable Control file entry for one ScalarDB table
176+
* @throws ControlFileValidationException when a partition key is not mapped
177+
*/
178+
private static void checkPartitionKeys(
179+
TableMetadata tableMetadata,
180+
Set<String> mappedTargetColumns,
181+
ControlFileTable controlFileTable)
182+
throws ControlFileValidationException {
183+
LinkedHashSet<String> partitionKeyNames = tableMetadata.getPartitionKeyNames();
184+
for (String partitionKeyName : partitionKeyNames) {
185+
if (!mappedTargetColumns.contains(partitionKeyName)) {
186+
throw new ControlFileValidationException(
187+
String.format(
188+
MISSING_PARTITION_KEY,
189+
partitionKeyName,
190+
TableMetadataUtil.getTableLookupKey(controlFileTable)));
191+
}
192+
}
193+
}
194+
195+
/**
196+
* Check that the required clustering keys are mapped in the control file. Ran only for control
197+
* file validation mode KEYS and FULL.
198+
*
199+
* @param tableMetadata Metadata for one ScalarDB table
200+
* @param mappedTargetColumns Set of target columns that are mapped in the control file
201+
* @param controlFileTable Control file entry for one ScalarDB table
202+
* @throws ControlFileValidationException when a clustering key is not mapped
203+
*/
204+
private static void checkClusteringKeys(
205+
TableMetadata tableMetadata,
206+
Set<String> mappedTargetColumns,
207+
ControlFileTable controlFileTable)
208+
throws ControlFileValidationException {
209+
LinkedHashSet<String> clusteringKeyNames = tableMetadata.getClusteringKeyNames();
210+
for (String clusteringKeyName : clusteringKeyNames) {
211+
if (!mappedTargetColumns.contains(clusteringKeyName)) {
212+
throw new ControlFileValidationException(
213+
String.format(
214+
MISSING_CLUSTERING_KEY,
215+
clusteringKeyName,
216+
TableMetadataUtil.getTableLookupKey(controlFileTable)));
217+
}
218+
}
219+
}
220+
221+
/**
222+
* Check that a control file table mapping does not contain duplicate mappings for the same target
223+
* column and creates a set of unique mappings
224+
*
225+
* @param controlFileTable Control file entry for one ScalarDB table
226+
* @return Set of uniquely mapped target columns
227+
* @throws ControlFileValidationException when a duplicate mapping is found
228+
*/
229+
private static Set<String> getTargetColumnSet(ControlFileTable controlFileTable)
230+
throws ControlFileValidationException {
231+
Set<String> mappedTargetColumns = new HashSet<>();
232+
for (ControlFileTableFieldMapping mapping : controlFileTable.getMappings()) {
233+
if (!mappedTargetColumns.add(mapping.getTargetColumn())) {
234+
throw new ControlFileValidationException(
235+
String.format(
236+
MULTIPLE_MAPPINGS_FOR_COLUMN_FOUND,
237+
mapping.getTargetColumn(),
238+
TableMetadataUtil.getTableLookupKey(controlFileTable)));
239+
}
240+
}
241+
return mappedTargetColumns;
242+
}
243+
}

0 commit comments

Comments
 (0)