@@ -127,9 +127,8 @@ def convert_to_csv(self, tenant_id, client_id, client_secret, file_id, sheet="Sh
127127 sheet = "Sheet1"
128128
129129 #wb = Workbook(basename)
130- wb = load_workbook (basename )
131- print ("Sheets: %s" % wb .sheetnames )
132-
130+ wb = load_workbook (basename , read_only = True )
131+
133132 # grab the active worksheet
134133 ws = wb .active
135134 #for item in ws.iter_rows():
@@ -149,19 +148,25 @@ def convert_to_csv(self, tenant_id, client_id, client_secret, file_id, sheet="Sh
149148 csvdata = csvdata [:- 1 ]+ "\n "
150149 csvdata = csvdata [:- 1 ]
151150
152- print ()
153- print ("Data length:\n (%s)\n " % len (csvdata ))
151+ print ("Data length: (%s)" % len (csvdata ))
154152
155153 return csvdata
156154
157- def get_excel_file_data (self , file_id ):
155+ def get_excel_file_data (self , file_id , to_list = True , sheets = "" , max_rows = 100000 , skip_rows = 0 ):
158156 filedata = self .get_file (file_id )
159157 if filedata ["success" ] != True :
160- print (f"Bad info from file: { filedata } " )
158+ print (f"[ERROR] Bad info from file: { filedata } " )
161159 return filedata
162160
161+ if not sheets :
162+ sheets = ""
163+
164+ sheets = sheets .lower ()
165+ max_rows = int (max_rows )
166+ skip_rows = int (skip_rows )
167+
163168 try :
164- print ("Filename: %s" % filedata ["filename" ])
169+ # print("Filename: %s" % filedata["filename"])
165170 if "csv" in filedata ["filename" ]:
166171 try :
167172 filedata ["data" ] = filedata ["data" ].decode ("utf-8" )
@@ -183,23 +188,41 @@ def get_excel_file_data(self, file_id):
183188
184189 #wb = Workbook(basename)
185190 try :
186- wb = load_workbook (basename )
191+ wb = load_workbook (basename , read_only = True )
187192 except Exception as e :
188193 return {
189194 "success" : False ,
190195 "reason" : "The file is invalid. Are you sure it's a valid excel file? CSV files are not supported." ,
191196 "exception" : "Error: %s" % e ,
192197 }
193-
194- print ("Sheets: %s" % wb .sheetnames )
195198
199+ # Default
200+ #max_count = 25000
201+ #if os.getenv("SHUFFLE_APP_SDK_TIMEOUT") > 240:
202+ # Limits are ~no longer relevant if to_list=True
203+
204+ cnt = 0
205+ skipped_cnt = 0
196206 output_data = []
197207 for ws in wb .worksheets :
198- print (f"Title: { ws .title } " )
208+ if ws .title .lower () not in sheets and sheets != "" :
209+ continue
199210
200211 # grab the active worksheet
201212 csvdata = ""
213+ if cnt - skipped_cnt > skip_rows :
214+ break
215+
216+ list_data = []
202217 for row in ws .values :
218+ cnt += 1
219+ if cnt < skip_rows :
220+ skipped_cnt += 1
221+ continue
222+
223+ if cnt - skipped_cnt > max_rows :
224+ break
225+
203226 for value in row :
204227 #print(value)
205228 if value == None :
@@ -209,15 +232,30 @@ def get_excel_file_data(self, file_id):
209232 else :
210233 csvdata += str (value )+ ","
211234
212- csvdata = csvdata [:- 1 ]+ "\n "
213- csvdata = csvdata [:- 1 ]
214-
215- print ()
216- print ("Data:\n %s\n " % csvdata )
217- output_data .append ({
235+ list_data .append (csvdata )
236+ if to_list == False :
237+ csvdata = csvdata [:- 1 ]+ "\n "
238+ else :
239+ csvdata = ""
240+
241+ #csvdata = csvdata[:-1]
242+
243+ output = {
218244 "sheet" : ws .title ,
219245 "data" : csvdata ,
220- })
246+ }
247+
248+ if to_list == False :
249+ print ("Data len (%s): %d" % (ws .title , len (csvdata )))
250+ output_data .append (output )
251+ else :
252+ print ("Data len (%s): %d" % (ws .title , len (list_data )))
253+ output_data .append ({
254+ "sheet" : ws .title ,
255+ "data" : list_data ,
256+ })
257+
258+ print ("Done! Returning data of length: %d" % len (output_data ))
221259
222260 return output_data
223261
0 commit comments