2626import org .apache .paimon .format .FileFormat ;
2727import org .apache .paimon .mergetree .compact .aggregate .FieldAggregator ;
2828import org .apache .paimon .mergetree .compact .aggregate .factory .FieldAggregatorFactory ;
29- import org .apache .paimon .mergetree .compact .aggregate .factory .FieldLastNonNullValueAggFactory ;
3029import org .apache .paimon .options .ConfigOption ;
3130import org .apache .paimon .options .Options ;
3231import org .apache .paimon .table .BucketMode ;
4443import org .apache .paimon .utils .Preconditions ;
4544import org .apache .paimon .utils .StringUtils ;
4645
47- import java .util .ArrayList ;
4846import java .util .Arrays ;
49- import java .util .Collection ;
5047import java .util .Collections ;
5148import java .util .HashMap ;
52- import java .util .HashSet ;
5349import java .util .List ;
5450import java .util .Map ;
5551import java .util .Optional ;
56- import java .util .Set ;
5752import java .util .stream .Collectors ;
5853
5954import static org .apache .paimon .CoreOptions .BUCKET_KEY ;
7772import static org .apache .paimon .CoreOptions .SNAPSHOT_NUM_RETAINED_MAX ;
7873import static org .apache .paimon .CoreOptions .SNAPSHOT_NUM_RETAINED_MIN ;
7974import static org .apache .paimon .CoreOptions .STREAMING_READ_OVERWRITE ;
80- import static org .apache .paimon .mergetree . compact . PartialUpdateMergeFunction . SEQUENCE_GROUP ;
75+ import static org .apache .paimon .table . PrimaryKeyTableUtils . createMergeFunctionFactory ;
8176import static org .apache .paimon .table .SpecialFields .KEY_FIELD_PREFIX ;
8277import static org .apache .paimon .table .SpecialFields .SYSTEM_FIELD_NAMES ;
8378import static org .apache .paimon .types .DataTypeRoot .ARRAY ;
@@ -96,8 +91,6 @@ public class SchemaValidation {
9691 /**
9792 * Validate the {@link TableSchema} and {@link CoreOptions}.
9893 *
99- * <p>TODO validate all items in schema and all keys in options.
100- *
10194 * @param schema the schema to be validated
10295 */
10396 public static void validateTableSchema (TableSchema schema ) {
@@ -122,7 +115,7 @@ public static void validateTableSchema(TableSchema schema) {
122115
123116 validateSequenceField (schema , options );
124117
125- validateSequenceGroup (schema , options );
118+ validateMergeFunction (schema );
126119
127120 ChangelogProducer changelogProducer = options .changelogProducer ();
128121 if (schema .primaryKeys ().isEmpty () && changelogProducer != ChangelogProducer .NONE ) {
@@ -449,90 +442,12 @@ private static void validateFieldsPrefix(TableSchema schema, CoreOptions options
449442 });
450443 }
451444
452- private static void validateSequenceGroup (TableSchema schema , CoreOptions options ) {
453- Map <String , Set <String >> fields2Group = new HashMap <>();
454- Set <Integer > sequenceGroupFieldIndexs = new HashSet <>();
455- List <String > fieldNames = schema .fieldNames ();
456- for (Map .Entry <String , String > entry : options .toMap ().entrySet ()) {
457- String k = entry .getKey ();
458- String v = entry .getValue ();
459- if (k .startsWith (FIELDS_PREFIX ) && k .endsWith (SEQUENCE_GROUP )) {
460- Arrays .stream (v .split (FIELDS_SEPARATOR ))
461- .map (fieldName -> requireField (fieldName , fieldNames ))
462- .forEach (sequenceGroupFieldIndexs ::add );
463- String [] sequenceFieldNames =
464- k .substring (
465- FIELDS_PREFIX .length () + 1 ,
466- k .length () - SEQUENCE_GROUP .length () - 1 )
467- .split (FIELDS_SEPARATOR );
468-
469- for (String field : v .split (FIELDS_SEPARATOR )) {
470- if (!fieldNames .contains (field )) {
471- throw new IllegalArgumentException (
472- String .format ("Field %s can not be found in table schema." , field ));
473- }
474-
475- List <String > sequenceFieldsList = new ArrayList <>();
476- for (String sequenceFieldName : sequenceFieldNames ) {
477- if (!fieldNames .contains (sequenceFieldName )) {
478- throw new IllegalArgumentException (
479- String .format (
480- "The sequence field group: %s can not be found in table schema." ,
481- sequenceFieldName ));
482- }
483- sequenceFieldsList .add (sequenceFieldName );
484- }
485-
486- if (fields2Group .containsKey (field )) {
487- List <List <String >> sequenceGroups = new ArrayList <>();
488- sequenceGroups .add (new ArrayList <>(fields2Group .get (field )));
489- sequenceGroups .add (sequenceFieldsList );
490-
491- throw new IllegalArgumentException (
492- String .format (
493- "Field %s is defined repeatedly by multiple groups: %s." ,
494- field , sequenceGroups ));
495- }
496-
497- Set <String > group = fields2Group .computeIfAbsent (field , p -> new HashSet <>());
498- group .addAll (sequenceFieldsList );
499- }
500-
501- // add self
502- Arrays .stream (sequenceFieldNames )
503- .mapToInt (fieldName -> requireField (fieldName , fieldNames ))
504- .forEach (sequenceGroupFieldIndexs ::add );
505- }
506- }
507-
508- if (options .mergeEngine () == MergeEngine .PARTIAL_UPDATE ) {
509- for (String fieldName : fieldNames ) {
510- String aggFunc = options .fieldAggFunc (fieldName );
511- String aggFuncName = aggFunc == null ? options .fieldsDefaultFunc () : aggFunc ;
512- if (schema .primaryKeys ().contains (fieldName )) {
513- continue ;
514- }
515- if (aggFuncName != null ) {
516- // last_non_null_value doesn't require sequence group
517- checkArgument (
518- aggFuncName .equals (FieldLastNonNullValueAggFactory .NAME )
519- || sequenceGroupFieldIndexs .contains (
520- fieldNames .indexOf (fieldName )),
521- "Must use sequence group for aggregation functions but not found for field %s." ,
522- fieldName );
523- }
524- }
445+ private static void validateMergeFunction (TableSchema schema ) {
446+ if (schema .primaryKeys ().isEmpty ()) {
447+ return ;
525448 }
526449
527- Set <String > illegalGroup =
528- fields2Group .values ().stream ()
529- .flatMap (Collection ::stream )
530- .filter (g -> options .fieldAggFunc (g ) != null )
531- .collect (Collectors .toSet ());
532- if (!illegalGroup .isEmpty ()) {
533- throw new IllegalArgumentException (
534- "Should not defined aggregation function on sequence group: " + illegalGroup );
535- }
450+ createMergeFunctionFactory (schema );
536451 }
537452
538453 private static void validateForDeletionVectors (CoreOptions options ) {
0 commit comments