@@ -88,23 +88,18 @@ def load(self) -> TabularData:
8888
8989 def lazy_sheet_loader (xls_file : pd .ExcelFile , xls_sheet_name : str ):
9090 def sheet_loader ():
91- sheet_data = xls_file .parse (xls_sheet_name , header = self ._header_rows )
91+ preview = xls_file .parse (xls_sheet_name , header = self ._header_rows , nrows = 0 )
92+ columns = list (preview .columns )
93+ dtype = {}
94+ for col in columns :
95+ if col == "Name" or (isinstance (col , tuple ) and col [0 ] == "Name" ):
96+ dtype [col ] = str
97+ sheet_data = xls_file .parse (xls_sheet_name , header = self ._header_rows , dtype = dtype )
9298 sheet_data = self ._remove_unnamed_column_placeholders (data = sheet_data )
9399 sheet_data = self ._handle_duplicate_columns (data = sheet_data , sheet_name = xls_sheet_name )
94100 sheet_data = self ._process_uuid_columns (data = sheet_data , sheet_name = xls_sheet_name )
95101 sheet_data = self ._update_column_names (data = sheet_data )
96- # Only convert large integer values to strings for columns named 'Name'
97- for col in sheet_data .columns :
98- if (col == "Name" or (isinstance (col , tuple ) and col [0 ] == "Name" )) and sheet_data [col ].dtype in [
99- "float64" ,
100- "int64" ,
101- ]:
102- if (sheet_data [col ].abs () >= 1e12 ).any ():
103- sheet_data [col ] = sheet_data [col ].apply (
104- lambda x : str (int (x ))
105- if pd .notnull (x ) and isinstance (x , (int , float )) and abs (x ) >= 1e12
106- else x
107- )
102+
108103 return sheet_data
109104
110105 data : Dict [str , LazyDataFrame ] = {}
0 commit comments