6565
6666import java .io .File ;
6767import java .util .Collections ;
68+ import java .util .EnumSet ;
6869import java .util .List ;
6970import java .util .Map ;
7071
7576 * IncrementalIndex (realtime) and QueryableIndex (historical) segments that were created with various sketches,
7677 * ensuring that both segment types report the same column type (COMPLEX<HLLSketch>) by using the serde's normal type.
7778 */
78- public abstract class SketchBuildSegmentMetadataQueryTestBase extends InitializedNullHandlingTest
79+ public abstract class BaseSketchBuildSegmentMetadataQueryTest extends InitializedNullHandlingTest
7980{
8081 protected static final String DATA_SOURCE = "test_datasource" ;
8182 protected static final String SKETCH_COLUMN = "sketch" ;
@@ -117,14 +118,18 @@ public void setUp()
117118
118119 /**
119120 * @return an aggregator that builds a sketch from raw input (its intermediate type may be a non-canonical
120- * "build" type name).
121+ * "build" type name).
121122 */
122123 protected abstract AggregatorFactory buildSketchAggregatorFactory (String sketchColumn , String inputFieldName );
123124
124- /** @return the canonical complex type that should be reported by SegmentMetadataQuery. */
125+ /**
126+ * @return the canonical complex type that should be reported by SegmentMetadataQuery.
127+ */
125128 protected abstract ColumnType expectedCanonicalColumnType ();
126129
127- /** Validate the combining/merged aggregator returned by SegmentMetadataQuery. */
130+ /**
131+ * Validate the combining/merged aggregator returned by SegmentMetadataQuery.
132+ */
128133 protected abstract void assertMergedSketchAggregator (AggregatorFactory aggregator , String sketchColumn );
129134
130135 @ Test
@@ -190,15 +195,17 @@ public void testSegmentMetadataQueryWithBuildAggregatorAcrossMixedSegments() thr
190195 }
191196
192197 // Make sure the merge went through ok
193- Assert .assertEquals (ColumnType .LONG , mergedAnalysis .getColumns ().get ("__time" ).getTypeSignature ());
194- Assert .assertEquals (ColumnType .STRING , mergedAnalysis .getColumns ().get (DIM_COLUMN ).getTypeSignature ());
195- Assert .assertEquals (ColumnType .LONG , mergedAnalysis .getColumns ().get ("count" ).getTypeSignature ());
198+ ColumnAnalysis timeColumnAnalysis = mergedAnalysis .getColumns ().get ("__time" );
199+ Assert .assertEquals (ColumnType .LONG , timeColumnAnalysis .getTypeSignature ());
200+ ColumnAnalysis dimColumnAnalysis = mergedAnalysis .getColumns ().get (DIM_COLUMN );
201+ Assert .assertEquals (ColumnType .STRING , dimColumnAnalysis .getTypeSignature ());
202+ ColumnAnalysis countColumnAnalysis = mergedAnalysis .getColumns ().get ("count" );
203+ Assert .assertEquals (ColumnType .LONG , countColumnAnalysis .getTypeSignature ());
204+ ColumnAnalysis sketchColumnAnalysis = mergedAnalysis .getColumns ().get (SKETCH_COLUMN );
196205 Assert .assertEquals (
197206 expectedCanonicalColumnType (),
198- mergedAnalysis . getColumns (). get ( SKETCH_COLUMN ) .getTypeSignature ()
207+ sketchColumnAnalysis .getTypeSignature ()
199208 );
200-
201- ColumnAnalysis sketchColumnAnalysis = mergedAnalysis .getColumns ().get (SKETCH_COLUMN );
202209 Assert .assertFalse ("Sketch column should not have multiple values" , sketchColumnAnalysis .isHasMultipleValues ());
203210
204211 Assert .assertNotNull ("Aggregators should be present" , mergedAnalysis .getAggregators ());
@@ -259,6 +266,10 @@ public void testSegmentMetadataQueryWithOnlyPersistedSegments() throws Exception
259266 .dataSource (DATA_SOURCE )
260267 .intervals (Collections .singletonList (Intervals .ETERNITY ))
261268 .merge (true )
269+ .analysisTypes (EnumSet .of (
270+ SegmentMetadataQuery .AnalysisType .MINMAX ,
271+ SegmentMetadataQuery .AnalysisType .CARDINALITY
272+ ))
262273 .build ();
263274 QueryToolChest <SegmentAnalysis , SegmentMetadataQuery > toolChest = queryRunnerFactory .getToolchest ();
264275 QueryRunner <SegmentAnalysis > mergedRunner = toolChest .mergeResults (
@@ -277,13 +288,38 @@ public void testSegmentMetadataQueryWithOnlyPersistedSegments() throws Exception
277288 Assert .assertEquals (1 , resultList .size ());
278289 SegmentAnalysis mergedAnalysis = resultList .get (0 );
279290
280- ColumnAnalysis sketchColumnAnalysis = mergedAnalysis .getColumns ().get (SKETCH_COLUMN );
281- Assert .assertNotNull ("Sketch column should be present" , sketchColumnAnalysis );
282- Assert .assertFalse (
283- "No error expected when both segments are persisted: " + sketchColumnAnalysis .getErrorMessage (),
284- sketchColumnAnalysis .isError ()
285- );
291+ // Verify all columns exist
292+ Assert .assertEquals ("Should have 4 columns" , 4 , mergedAnalysis .getColumns ().size ());
293+ Assert .assertTrue ("Should contain __time column" , mergedAnalysis .getColumns ().containsKey ("__time" ));
294+ Assert .assertTrue ("Should contain dim column" , mergedAnalysis .getColumns ().containsKey (DIM_COLUMN ));
295+ Assert .assertTrue ("Should contain count column" , mergedAnalysis .getColumns ().containsKey ("count" ));
296+ Assert .assertTrue ("Should contain sketch column" , mergedAnalysis .getColumns ().containsKey (SKETCH_COLUMN ));
297+
298+ // Verify no column has merge errors
299+ for (Map .Entry <String , ColumnAnalysis > entry : mergedAnalysis .getColumns ().entrySet ()) {
300+ Assert .assertFalse (
301+ "Column '" + entry .getKey () + "' should not have error: " + entry .getValue ().getErrorMessage (),
302+ entry .getValue ().isError ()
303+ );
304+ }
286305
306+ // Verify time
307+ ColumnAnalysis timeColumnAnalysis = mergedAnalysis .getColumns ().get ("__time" );
308+ Assert .assertEquals (ColumnType .LONG , timeColumnAnalysis .getTypeSignature ());
309+
310+ // Verify dim column: type STRING, min/max/cardinality
311+ ColumnAnalysis dimColumnAnalysis = mergedAnalysis .getColumns ().get (DIM_COLUMN );
312+ Assert .assertEquals (ColumnType .STRING , dimColumnAnalysis .getTypeSignature ());
313+ Assert .assertEquals ("dim min value" , "dim_value_0" , dimColumnAnalysis .getMinValue ());
314+ Assert .assertEquals ("dim max value" , "dim_value_9" , dimColumnAnalysis .getMaxValue ());
315+ Assert .assertEquals ("dim cardinality should be 10" , 10 , dimColumnAnalysis .getCardinality ().intValue ());
316+
317+ // Verify count column: type LONG (min/max not computed for metric columns by default)
318+ ColumnAnalysis countColumnAnalysis = mergedAnalysis .getColumns ().get ("count" );
319+ Assert .assertEquals (ColumnType .LONG , countColumnAnalysis .getTypeSignature ());
320+
321+ // Verify count column: ensure type signature matches
322+ ColumnAnalysis sketchColumnAnalysis = mergedAnalysis .getColumns ().get (SKETCH_COLUMN );
287323 Assert .assertEquals (
288324 expectedCanonicalColumnType (),
289325 sketchColumnAnalysis .getTypeSignature ()
@@ -325,6 +361,10 @@ public void testSegmentMetadataQueryWithOnlyRealtimeSegments()
325361 .dataSource (DATA_SOURCE )
326362 .intervals (Collections .singletonList (Intervals .ETERNITY ))
327363 .merge (true )
364+ .analysisTypes (EnumSet .of (
365+ SegmentMetadataQuery .AnalysisType .MINMAX ,
366+ SegmentMetadataQuery .AnalysisType .CARDINALITY
367+ ))
328368 .build ();
329369 QueryToolChest <SegmentAnalysis , SegmentMetadataQuery > toolChest = queryRunnerFactory .getToolchest ();
330370 QueryRunner <SegmentAnalysis > mergedRunner = toolChest .mergeResults (
@@ -343,13 +383,38 @@ public void testSegmentMetadataQueryWithOnlyRealtimeSegments()
343383 Assert .assertEquals (1 , resultList .size ());
344384 SegmentAnalysis mergedAnalysis = resultList .get (0 );
345385
346- ColumnAnalysis sketchColumnAnalysis = mergedAnalysis . getColumns (). get ( SKETCH_COLUMN );
347- Assert .assertNotNull ( "Sketch column should be present " , sketchColumnAnalysis );
348- Assert .assertFalse (
349- "No error expected when both segments are realtime: " + sketchColumnAnalysis . getErrorMessage (),
350- sketchColumnAnalysis . isError ()
351- );
386+ // Verify all columns exist
387+ Assert .assertEquals ( "Should have 4 columns " , 4 , mergedAnalysis . getColumns (). size () );
388+ Assert .assertTrue ( "Should contain __time column" , mergedAnalysis . getColumns (). containsKey ( "__time" ));
389+ Assert . assertTrue ( "Should contain dim column" , mergedAnalysis . getColumns (). containsKey ( DIM_COLUMN ));
390+ Assert . assertTrue ( "Should contain count column" , mergedAnalysis . getColumns (). containsKey ( "count" ));
391+ Assert . assertTrue ( "Should contain sketch column" , mergedAnalysis . getColumns (). containsKey ( SKETCH_COLUMN ) );
352392
393+ // Verify no column has merge errors
394+ for (Map .Entry <String , ColumnAnalysis > entry : mergedAnalysis .getColumns ().entrySet ()) {
395+ Assert .assertFalse (
396+ "Column '" + entry .getKey () + "' should not have error: " + entry .getValue ().getErrorMessage (),
397+ entry .getValue ().isError ()
398+ );
399+ }
400+
401+ // Verify time
402+ ColumnAnalysis timeColumnAnalysis = mergedAnalysis .getColumns ().get ("__time" );
403+ Assert .assertEquals (ColumnType .LONG , timeColumnAnalysis .getTypeSignature ());
404+
405+ // Verify dim column: type STRING, min/max/cardinality
406+ ColumnAnalysis dimColumnAnalysis = mergedAnalysis .getColumns ().get (DIM_COLUMN );
407+ Assert .assertEquals (ColumnType .STRING , dimColumnAnalysis .getTypeSignature ());
408+ Assert .assertEquals ("dim min value" , "dim_value_0" , dimColumnAnalysis .getMinValue ());
409+ Assert .assertEquals ("dim max value" , "dim_value_9" , dimColumnAnalysis .getMaxValue ());
410+ Assert .assertEquals ("dim cardinality should be 10" , 10 , dimColumnAnalysis .getCardinality ().intValue ());
411+
412+ // Verify count column: type LONG (min/max not computed for metric columns by default)
413+ ColumnAnalysis countColumnAnalysis = mergedAnalysis .getColumns ().get ("count" );
414+ Assert .assertEquals (ColumnType .LONG , countColumnAnalysis .getTypeSignature ());
415+
416+ // Verify count column: ensure type signature matches
417+ ColumnAnalysis sketchColumnAnalysis = mergedAnalysis .getColumns ().get (SKETCH_COLUMN );
353418 Assert .assertEquals (
354419 expectedCanonicalColumnType (),
355420 sketchColumnAnalysis .getTypeSignature ()
0 commit comments