1- import pyodbc
2- import struct
31import csv
42import json
53import os
4+ import struct
65from datetime import datetime
7- from azure .identity import AzureCliCredential , get_bearer_token_provider
6+
7+ import pyodbc
8+ from azure .identity import AzureCliCredential
9+ from azure .search .documents import SearchClient
10+ from azure .search .documents .indexes import SearchIndexClient
811# SQL Server setup
912
1013SQL_SERVER = '<YOUR-SQL-SERVER-NAME>.database.windows.net'
1114SQL_DATABASE = '<YOUR-DATABASE-NAME>'
1215
1316credential = AzureCliCredential (process_timeout = 30 )
1417
15- try :
18+ try :
1619 driver = "{ODBC Driver 18 for SQL Server}"
1720 token_bytes = credential .get_token ("https://database.windows.net/.default" ).token .encode ("utf-16-LE" )
1821 token_struct = struct .pack (f"<I{ len (token_bytes )} s" , len (token_bytes ), token_bytes )
3134 cursor = conn .cursor ()
3235 print ("SQL Server connection established." )
3336
37+
3438def export_table_to_csv (table_name , output_dir = "." , cursor = cursor ):
3539 """
3640 Export a SQL table to CSV file.
37-
41+
3842 Args:
3943 table_name: Name of the table to export
4044 output_dir: Directory to save the CSV file (default: current directory)
4145 cursor: Database cursor to use (default: uses the global cursor)
42-
46+
4347 Returns:
4448 Path to the created CSV file
4549 """
@@ -51,42 +55,42 @@ def export_table_to_csv(table_name, output_dir=".", cursor=cursor):
5155 csv_filename = f"sample_{ table_name } .csv"
5256 # csv_filename = f"{table_name}_{timestamp}.csv"
5357 csv_path = os .path .join (output_dir , csv_filename )
54-
58+
5559 try :
5660 # Query all data from the table
5761 query = f"SELECT * FROM { table_name } "
5862 print (f"Executing query: { query } " )
5963 cursor .execute (query )
60-
64+
6165 # Get column names
6266 columns = [column [0 ] for column in cursor .description ]
63-
67+
6468 # Write to CSV
6569 print (f"Writing data to '{ csv_path } '..." )
6670 with open (csv_path , 'w' , newline = '' , encoding = 'utf-8' ) as csvfile :
6771 writer = csv .writer (csvfile )
68-
72+
6973 # Write header
7074 writer .writerow (columns )
71-
75+
7276 # Write data rows
7377 row_count = 0
7478 while True :
7579 rows = cursor .fetchmany (1000 ) # Fetch in batches for better performance
7680 if not rows :
7781 break
78-
82+
7983 for row in rows :
8084 # Convert each value to string, handling None values
8185 writer .writerow ([str (val ) if val is not None else '' for val in row ])
8286 row_count += 1
83-
87+
8488 if row_count % 10000 == 0 :
8589 print (f" Exported { row_count } rows..." )
86-
90+
8791 print (f"✓ Successfully exported { row_count } rows to '{ csv_path } '" )
8892 return csv_path
89-
93+
9094 except Exception as e :
9195 print (f"Error exporting table '{ table_name } ': { e } " )
9296 raise
@@ -95,46 +99,45 @@ def export_table_to_csv(table_name, output_dir=".", cursor=cursor):
9599def export_table_to_json (table_name , output_dir = "." , cursor = cursor , format = "json" ):
96100 """
97101 Export a SQL table to JSON or JSON Lines file.
98-
102+
99103 Args:
100104 table_name: Name of the table to export
101105 output_dir: Directory to save the file (default: current directory)
102106 cursor: Database cursor to use (default: uses the global cursor)
103107 format: Output format - "json" for JSON array or "jsonl" for JSON Lines (default: "json")
104-
108+
105109 Returns:
106110 Path to the created file
107111 """
108112 # Create output directory if it doesn't exist
109113 os .makedirs (output_dir , exist_ok = True )
110-
114+
111115 # Generate output filename
112- timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
113116 file_extension = "json" if format == "json" else "jsonl"
114117 filename = f"sample_{ table_name } .{ file_extension } "
115118 # filename = f"{table_name}_{timestamp}.{file_extension}"
116119 file_path = os .path .join (output_dir , filename )
117-
120+
118121 try :
119122 # Query all data from the table
120123 query = f"SELECT * FROM { table_name } "
121124 print (f"Executing query: { query } " )
122125 cursor .execute (query )
123-
126+
124127 # Get column names
125128 columns = [column [0 ] for column in cursor .description ]
126-
129+
127130 if format == "json" :
128131 # Collect all rows for JSON array format
129- print (f "Collecting data from table..." )
132+ print ("Collecting data from table..." )
130133 rows_data = []
131134 row_count = 0
132-
135+
133136 while True :
134137 rows = cursor .fetchmany (1000 ) # Fetch in batches for better performance
135138 if not rows :
136139 break
137-
140+
138141 for row in rows :
139142 # Convert row to dictionary
140143 row_dict = {}
@@ -147,26 +150,26 @@ def export_table_to_json(table_name, output_dir=".", cursor=cursor, format="json
147150 row_dict [col ] = value
148151 rows_data .append (row_dict )
149152 row_count += 1
150-
153+
151154 if row_count % 10000 == 0 :
152155 print (f" Collected { row_count } rows..." )
153-
156+
154157 # Write as JSON array
155158 print (f"Writing { row_count } rows to '{ file_path } '..." )
156159 with open (file_path , 'w' , encoding = 'utf-8' ) as json_file :
157160 json .dump (rows_data , json_file , ensure_ascii = False , indent = 2 )
158-
161+
159162 else : # jsonl format
160163 # Write to JSON Lines format (one JSON object per line)
161164 print (f"Writing data to '{ file_path } '..." )
162165 row_count = 0
163-
166+
164167 with open (file_path , 'w' , encoding = 'utf-8' ) as jsonl_file :
165168 while True :
166169 rows = cursor .fetchmany (1000 ) # Fetch in batches for better performance
167170 if not rows :
168171 break
169-
172+
170173 for row in rows :
171174 # Convert row to dictionary
172175 row_dict = {}
@@ -179,13 +182,13 @@ def export_table_to_json(table_name, output_dir=".", cursor=cursor, format="json
179182 row_dict [col ] = value
180183 jsonl_file .write (json .dumps (row_dict , ensure_ascii = False ) + '\n ' )
181184 row_count += 1
182-
185+
183186 if row_count % 10000 == 0 :
184187 print (f" Exported { row_count } rows..." )
185-
188+
186189 print (f"✓ Successfully exported { row_count } rows to '{ file_path } '" )
187190 return file_path
188-
191+
189192 except Exception as e :
190193 print (f"Error exporting table '{ table_name } ': { e } " )
191194 raise
@@ -202,10 +205,6 @@ def export_table_to_json(table_name, output_dir=".", cursor=cursor, format="json
202205cursor .close ()
203206conn .close ()
204207print ("Database connection closed." )
205-
206-
207- from azure .search .documents import SearchClient
208- from azure .search .documents .indexes import SearchIndexClient
209208SEARCH_ENDPOINT = "https://<YOUR-SEARCH-SERVICE-NAME>.search.windows.net"
210209INDEX_NAME = "call_transcripts_index"
211210
@@ -219,26 +218,25 @@ def export_table_to_json(table_name, output_dir=".", cursor=cursor, format="json
219218def export_search_index_to_json (index_name , output_dir = "." , search_client = search_client , format = "json" ):
220219 """
221220 Export all documents from an Azure AI Search index to JSON or JSON Lines file.
222-
221+
223222 Args:
224223 index_name: Name of the search index
225224 output_dir: Directory to save the file (default: current directory)
226225 search_client: Azure Search client to use (default: uses the global search_client)
227226 format: Output format - "json" for JSON array or "jsonl" for JSON Lines (default: "json")
228-
227+
229228 Returns:
230229 Path to the created file
231230 """
232231 # Create output directory if it doesn't exist
233232 os .makedirs (output_dir , exist_ok = True )
234-
233+
235234 # Generate output filename
236- timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
237235 file_extension = "json" if format == "json" else "jsonl"
238236 filename = f"sample_{ index_name } .{ file_extension } "
239237 # filename = f"{index_name}_{timestamp}.{file_extension}"
240238 file_path = os .path .join (output_dir , filename )
241-
239+
242240 try :
243241 # Search for all documents (empty search returns everything)
244242 print (f"Retrieving documents from search index '{ index_name } '..." )
@@ -247,31 +245,31 @@ def export_search_index_to_json(index_name, output_dir=".", search_client=search
247245 include_total_count = True ,
248246 top = 1000 # Adjust batch size as needed
249247 )
250-
248+
251249 # Collect documents
252250 documents = []
253251 doc_count = 0
254-
252+
255253 if format == "json" :
256254 # Collect all documents for JSON array format
257- print (f "Collecting documents..." )
255+ print ("Collecting documents..." )
258256 for result in results :
259257 doc = dict (result )
260258 documents .append (doc )
261259 doc_count += 1
262-
260+
263261 if doc_count % 1000 == 0 :
264262 print (f" Collected { doc_count } documents..." )
265-
263+
266264 if doc_count == 0 :
267265 print (f"No documents found in index '{ index_name } '" )
268266 return None
269-
267+
270268 # Write as JSON array
271269 print (f"Writing { doc_count } documents to '{ file_path } '..." )
272270 with open (file_path , 'w' , encoding = 'utf-8' ) as json_file :
273271 json .dump (documents , json_file , ensure_ascii = False , indent = 2 )
274-
272+
275273 else : # jsonl format
276274 # Write to JSON Lines format (one JSON object per line)
277275 print (f"Writing documents to '{ file_path } '..." )
@@ -280,17 +278,17 @@ def export_search_index_to_json(index_name, output_dir=".", search_client=search
280278 doc = dict (result )
281279 jsonl_file .write (json .dumps (doc , ensure_ascii = False ) + '\n ' )
282280 doc_count += 1
283-
281+
284282 if doc_count % 1000 == 0 :
285283 print (f" Exported { doc_count } documents..." )
286-
284+
287285 if doc_count == 0 :
288286 print (f"No documents found in index '{ index_name } '" )
289287 return None
290-
288+
291289 print (f"✓ Successfully exported { doc_count } documents to '{ file_path } '" )
292290 return file_path
293-
291+
294292 except Exception as e :
295293 print (f"Error exporting search index '{ index_name } ': { e } " )
296294 raise
0 commit comments