Skip to content

Commit 4538055

Browse files
committed
various fixes
1 parent 16c2ea9 commit 4538055

11 files changed

+10
-55
lines changed

py-src/data_formulator/agents/agent_code_explanation.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
33

4-
import pandas as pd
5-
from data_formulator.agents.agent_utils import generate_data_summary, extract_code_from_gpt_response
4+
from data_formulator.agents.agent_utils import generate_data_summary
65

76
import logging
87

py-src/data_formulator/agents/agent_py_concept_derive.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
33

4-
import json
54
import time
65

76
from data_formulator.agents.agent_utils import generate_data_summary, extract_code_from_gpt_response
@@ -10,7 +9,6 @@
109
import traceback
1110

1211
import logging
13-
import datetime
1412

1513
logger = logging.getLogger(__name__)
1614

py-src/data_formulator/agents/agent_py_data_transform.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# Licensed under the MIT License.
33

44
import json
5-
import sys
65

76
from data_formulator.agents.agent_utils import extract_json_objects, generate_data_summary, extract_code_from_gpt_response
87
import data_formulator.py_sandbox as py_sandbox

py-src/data_formulator/agents/agent_query_completion.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
33

4-
import pandas as pd
54
import json
65

7-
from data_formulator.agents.agent_utils import extract_code_from_gpt_response, extract_json_objects
6+
from data_formulator.agents.agent_utils import extract_json_objects
87
import re
98
import logging
109

py-src/data_formulator/agents/agent_utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,6 @@
66
import pandas as pd
77
import numpy as np
88

9-
import base64
10-
11-
from pprint import pprint
12-
139
import re
1410

1511
def string_to_py_varname(var_str):

py-src/data_formulator/agents/client_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import os
21
import litellm
32
import openai
43
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

py-src/data_formulator/data_loader/kusto_data_loader.py

Lines changed: 4 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -51,41 +51,6 @@ def query(self, kql: str) -> pd.DataFrame:
5151
return dataframe_from_result_table(result.primary_results[0])
5252

5353
def list_tables(self) -> List[Dict[str, Any]]:
54-
# first list functions (views)
55-
query = ".show functions"
56-
function_result_df = self.query(query)
57-
58-
functions = []
59-
for func in function_result_df.to_dict(orient="records"):
60-
func_name = func['Name']
61-
result = self.query(f".show function ['{func_name}'] schema as json").to_dict(orient="records")
62-
schema = json.loads(result[0]['Schema'])
63-
parameters = schema['InputParameters']
64-
columns = [{
65-
'name': r["Name"],
66-
'type': r["Type"]
67-
} for r in schema['OutputColumns']]
68-
69-
# skip functions with parameters at the moment
70-
if len(parameters) > 0:
71-
continue
72-
73-
sample_query = f"['{func_name}'] | take {10}"
74-
sample_result = self.query(sample_query).to_dict(orient="records")
75-
76-
function_metadata = {
77-
"row_count": 0,
78-
"columns": columns,
79-
"parameters": parameters,
80-
"sample_rows": sample_result
81-
}
82-
functions.append({
83-
"type": "function",
84-
"name": func_name,
85-
"metadata": function_metadata
86-
})
87-
88-
# then list tables
8954
query = ".show tables"
9055
tables_df = self.query(query)
9156

@@ -101,8 +66,8 @@ def list_tables(self) -> List[Dict[str, Any]]:
10166
row_count_result = self.query(f".show table ['{table_name}'] details").to_dict(orient="records")
10267
row_count = row_count_result[0]["TotalRowCount"]
10368

104-
sample_query = f"['{table_name}'] | take {10}"
105-
sample_result = self.query(sample_query).to_dict(orient="records")
69+
sample_query = f"['{table_name}'] | take {5}"
70+
sample_result = json.loads(self.query(sample_query).to_json(orient="records"))
10671

10772
table_metadata = {
10873
"row_count": row_count,
@@ -116,7 +81,7 @@ def list_tables(self) -> List[Dict[str, Any]]:
11681
"metadata": table_metadata
11782
})
11883

119-
return functions + tables
84+
return tables
12085

12186
def ingest_data(self, table_name: str, name_as: str = None, size: int = 5000000) -> pd.DataFrame:
12287
if name_as is None:
@@ -167,7 +132,7 @@ def ingest_data(self, table_name: str, name_as: str = None, size: int = 5000000)
167132
total_rows_ingested += len(chunk_df)
168133

169134
def view_query_sample(self, query: str) -> str:
170-
return self.query(query).head(10).to_dict(orient="records")
135+
return json.loads(self.query(query).head(10).to_json(orient="records"))
171136

172137
def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame:
173138
# Sanitize the table name for SQL compatibility

py-src/data_formulator/data_loader/mysql_data_loader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def ingest_data(self, table_name: str, name_as: str | None = None, size: int = 1
9393
""")
9494

9595
def view_query_sample(self, query: str) -> str:
96-
return self.duck_db_conn.execute(query).df().head(10).to_dict(orient="records")
96+
return json.loads(self.duck_db_conn.execute(query).df().head(10).to_json(orient="records"))
9797

9898
def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame:
9999
# Execute the query and get results as a DataFrame

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ classifiers = [
2121
]
2222

2323
dependencies = [
24-
"autopep8",
2524
"jupyter",
2625
"pandas",
2726
"docker",
@@ -31,6 +30,7 @@ dependencies = [
3130
"flask-cors",
3231
"openai",
3332
"azure-identity",
33+
"azure-kusto-data",
3434
"azure-keyvault-secrets",
3535
"python-dotenv",
3636
"vega_datasets",

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
autopep8
21
jupyter
32
pandas
43
docker
@@ -7,6 +6,7 @@ matplotlib
76
flask
87
openai
98
azure-identity
9+
azure-kusto-data
1010
azure-keyvault-secrets
1111
python-dotenv
1212
vega_datasets

0 commit comments

Comments
 (0)