Skip to content

Commit cf9d57a

Browse files
committed
no-op
PiperOrigin-RevId: 325308481
1 parent e38a87a commit cf9d57a

File tree

2 files changed

+36
-60
lines changed

2 files changed

+36
-60
lines changed

tensorflow_data_validation/statistics/generators/top_k_uniques_sketch_stats_generator.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,16 @@ def merge_accumulators(
179179
self,
180180
accumulators: Iterable[Dict[tfdv_types.FeaturePath, _CombinedSketch]]
181181
) -> Dict[tfdv_types.FeaturePath, _CombinedSketch]:
182-
new_acc = {}
183-
for acc in accumulators:
184-
for feature_name, combined_sketch in acc.items():
185-
existing_sketch = new_acc.get(feature_name, None)
182+
result = {}
183+
for accumulator in accumulators:
184+
for feature_name, combined_sketch in accumulator.items():
185+
existing_sketch = result.get(feature_name, None)
186186
if existing_sketch is None:
187-
new_acc[feature_name] = combined_sketch
187+
result[feature_name] = combined_sketch
188188
else:
189189
existing_sketch.merge(combined_sketch)
190-
new_acc[feature_name] = existing_sketch
191-
return new_acc
190+
result[feature_name] = existing_sketch
191+
return result
192192

193193
def extract_output(
194194
self, accumulator: Dict[tfdv_types.FeaturePath, _CombinedSketch]

tensorflow_data_validation/statistics/generators/top_k_uniques_sketch_stats_generator_test.py

Lines changed: 29 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,8 @@ def test_topk_uniques_sketch_with_single_bytes_feature(self):
9393
}
9494
}""", statistics_pb2.FeatureNameStatistics())
9595
}
96-
generator = (
97-
sketch_generator
98-
.TopKUniquesSketchStatsGenerator(
99-
num_top_values=4, num_rank_histogram_buckets=3))
96+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
97+
num_top_values=4, num_rank_histogram_buckets=3)
10098

10199
self.assertCombinerOutputEqual(batches, generator, expected_result)
102100

@@ -201,10 +199,8 @@ def test_topk_uniques_sketch_with_weights(self):
201199
}
202200
}""", statistics_pb2.FeatureNameStatistics())
203201
}
204-
generator = (
205-
sketch_generator
206-
.TopKUniquesSketchStatsGenerator(
207-
weight_feature='w', num_top_values=4, num_rank_histogram_buckets=3))
202+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
203+
weight_feature='w', num_top_values=4, num_rank_histogram_buckets=3)
208204
self.assertCombinerOutputEqual(batches, generator, expected_result)
209205

210206
def test_topk_uniques_sketch_with_single_unicode_feature(self):
@@ -264,10 +260,8 @@ def test_topk_uniques_sketch_with_single_unicode_feature(self):
264260
}
265261
}""", statistics_pb2.FeatureNameStatistics())
266262
}
267-
generator = (
268-
sketch_generator
269-
.TopKUniquesSketchStatsGenerator(
270-
num_top_values=4, num_rank_histogram_buckets=3))
263+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
264+
num_top_values=4, num_rank_histogram_buckets=3)
271265
self.assertCombinerOutputEqual(batches, generator, expected_result)
272266

273267
def test_topk_uniques_sketch_with_multiple_features(self):
@@ -373,10 +367,8 @@ def test_topk_uniques_sketch_with_multiple_features(self):
373367
}
374368
}""", statistics_pb2.FeatureNameStatistics())
375369
}
376-
generator = (
377-
sketch_generator
378-
.TopKUniquesSketchStatsGenerator(
379-
num_top_values=4, num_rank_histogram_buckets=3))
370+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
371+
num_top_values=4, num_rank_histogram_buckets=3)
380372
self.assertCombinerOutputEqual(batches, generator, expected_result)
381373

382374
def test_topk_uniques_sketch_zero_row(self):
@@ -385,19 +377,15 @@ def test_topk_uniques_sketch_zero_row(self):
385377
['f1'])
386378
]
387379
expected_result = {}
388-
generator = (
389-
sketch_generator
390-
.TopKUniquesSketchStatsGenerator(
391-
num_top_values=4, num_rank_histogram_buckets=3))
380+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
381+
num_top_values=4, num_rank_histogram_buckets=3)
392382
self.assertCombinerOutputEqual(batches, generator, expected_result)
393383

394384
def test_topk_uniques_sketch_empty_record_batch(self):
395385
batches = [pa.RecordBatch.from_arrays([], [])]
396386
expected_result = {}
397-
generator = (
398-
sketch_generator
399-
.TopKUniquesSketchStatsGenerator(
400-
num_top_values=4, num_rank_histogram_buckets=3))
387+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
388+
num_top_values=4, num_rank_histogram_buckets=3)
401389
self.assertCombinerOutputEqual(batches, generator, expected_result)
402390

403391
def test_topk_uniques_sketch_with_missing_feature(self):
@@ -502,10 +490,8 @@ def test_topk_uniques_sketch_with_missing_feature(self):
502490
}
503491
}""", statistics_pb2.FeatureNameStatistics())
504492
}
505-
generator = (
506-
sketch_generator
507-
.TopKUniquesSketchStatsGenerator(
508-
num_top_values=4, num_rank_histogram_buckets=3))
493+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
494+
num_top_values=4, num_rank_histogram_buckets=3)
509495
self.assertCombinerOutputEqual(batches, generator, expected_result)
510496

511497
def test_topk_uniques_sketch_with_numeric_feature(self):
@@ -568,10 +554,8 @@ def test_topk_uniques_sketch_with_numeric_feature(self):
568554
}
569555
}""", statistics_pb2.FeatureNameStatistics())
570556
}
571-
generator = (
572-
sketch_generator
573-
.TopKUniquesSketchStatsGenerator(
574-
num_top_values=4, num_rank_histogram_buckets=3))
557+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
558+
num_top_values=4, num_rank_histogram_buckets=3)
575559
self.assertCombinerOutputEqual(batches, generator, expected_result)
576560

577561
def test_topk_uniques_sketch_with_categorical_feature(self):
@@ -639,10 +623,8 @@ def test_topk_uniques_sketch_with_categorical_feature(self):
639623
}
640624
}
641625
""", schema_pb2.Schema())
642-
generator = (
643-
sketch_generator
644-
.TopKUniquesSketchStatsGenerator(
645-
schema=schema, num_top_values=4, num_rank_histogram_buckets=3))
626+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
627+
schema=schema, num_top_values=4, num_rank_histogram_buckets=3)
646628
self.assertCombinerOutputEqual(batches, generator, expected_result)
647629

648630
def test_topk_with_frequency_threshold(self):
@@ -723,12 +705,10 @@ def test_topk_with_frequency_threshold(self):
723705
}""", statistics_pb2.FeatureNameStatistics())
724706
}
725707

726-
generator = (
727-
sketch_generator
728-
.TopKUniquesSketchStatsGenerator(
729-
weight_feature='w',
730-
num_top_values=5, frequency_threshold=2,
731-
weighted_frequency_threshold=15, num_rank_histogram_buckets=3))
708+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
709+
weight_feature='w',
710+
num_top_values=5, frequency_threshold=2,
711+
weighted_frequency_threshold=15, num_rank_histogram_buckets=3)
732712
self.assertCombinerOutputEqual(batches, generator, expected_result)
733713

734714
def test_topk_struct_leaves(self):
@@ -916,13 +896,11 @@ def test_topk_struct_leaves(self):
916896
step: "f2"
917897
}""", statistics_pb2.FeatureNameStatistics()),
918898
}
919-
generator = (
920-
sketch_generator
921-
.TopKUniquesSketchStatsGenerator(
922-
schema=schema,
923-
weight_feature='w',
924-
num_top_values=3,
925-
num_rank_histogram_buckets=3))
899+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
900+
schema=schema,
901+
weight_feature='w',
902+
num_top_values=3,
903+
num_rank_histogram_buckets=3)
926904

927905
self.assertCombinerOutputEqual(batches, generator, expected_result)
928906

@@ -1028,10 +1006,8 @@ def test_topk_uniques_sketch_with_int_weights(self):
10281006
}
10291007
}""", statistics_pb2.FeatureNameStatistics())
10301008
}
1031-
generator = (
1032-
sketch_generator
1033-
.TopKUniquesSketchStatsGenerator(
1034-
weight_feature='w', num_top_values=4, num_rank_histogram_buckets=3))
1009+
generator = sketch_generator.TopKUniquesSketchStatsGenerator(
1010+
weight_feature='w', num_top_values=4, num_rank_histogram_buckets=3)
10351011
self.assertCombinerOutputEqual(batches, generator, expected_result)
10361012

10371013
if __name__ == '__main__':

0 commit comments

Comments
 (0)