Skip to content
This repository was archived by the owner on Jul 27, 2024. It is now read-only.

Commit 5a42fe3

Browse files
authored
Merge pull request #184 from PAIR-code/images
Fix feature stats generation when examples contain encoded images
2 parents 0293abc + e40ec4c commit 5a42fe3

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

facets_overview/python/base_generic_feature_statistics_generator.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import numpy as np
2121
import pandas as pd
22+
import sys
2223

2324

2425
class BaseGenericFeatureStatisticsGenerator(object):
@@ -269,13 +270,14 @@ def GetDatasetsProto(self, datasets, features=None,
269270
sorted_vals = sorted(zip(counts, vals), reverse=True)
270271
sorted_vals = sorted_vals[:histogram_categorical_levels_count]
271272
for val_index, val in enumerate(sorted_vals):
272-
if val[1].dtype.type is np.str_:
273-
printable_val = val[1]
274-
else:
275-
try:
273+
try:
274+
if (sys.version_info.major < 3 or
275+
isinstance(val[1], (bytes, bytearray))):
276276
printable_val = val[1].decode('UTF-8', 'strict')
277-
except (UnicodeDecodeError, UnicodeEncodeError):
278-
printable_val = '__BYTES_VALUE__'
277+
else:
278+
printable_val = val[1]
279+
except (UnicodeDecodeError, UnicodeEncodeError):
280+
printable_val = '__BYTES_VALUE__'
279281
bucket = featstats.rank_histogram.buckets.add(
280282
low_rank=val_index,
281283
high_rank=val_index,

0 commit comments

Comments
 (0)