Skip to content

Commit 3c5b58d

Browse files
authored
Merge pull request #81 from microsoft/dev
Support custom models
2 parents e529772 + 76abff7 commit 3c5b58d

27 files changed

+552
-365
lines changed

.gitignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11

2-
3-
*openai-keys.env
2+
*api-keys.env
43
**/*.ipynb_checkpoints/
54

65
.DS_Store

DEVELOPMENT.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,21 @@ How to set up your local machine.
1818
```bash
1919
pip install -r requirements.txt
2020
```
21+
- **Configure environment variable (optional)s**
22+
- copy `api-keys.env.example` to `api-keys.env` and add your API keys.
23+
- required fields for different providers are different, please refer to the [LiteLLM setup](https://docs.litellm.ai/docs#litellm-python-sdk) guide for more details.
24+
- currently only endpoint, model, api_key, api_base, api_version are supported.
25+
- this helps data formulator to automatically load the API keys when you run the app, so you don't need to set the API keys in the app UI.
2126
22-
- **Run**
27+
- **Run the app**
2328
- **Windows**
2429
```bash
2530
.\local_server.bat
2631
```
2732
2833
- **Unix-based**
2934
```bash
30-
.\local_server.sh
35+
./local_server.sh
3136
```
3237
3338
## Frontend (TypeScript)

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,12 @@ Transform data and create rich visualizations iteratively with AI 🪄. Try Data
2222

2323
## News 🔥🔥🔥
2424

25+
- [02-12-2025] More models supported now! Powered by [LiteLLM](https://github.com/BerriAI/litellm)!
26+
- Now supports OpenAI, Azure, Ollama, and Anthropic models (and more based on LiteLLM);
27+
- Models with strong code generation capabilities are recommended (gpt-4o, claude-3-5-sonnet etc.);
28+
- You can store API keys in `api-keys.env` to avoid typing them every time (see template `api-keys.env.template`).
29+
- Let us know which models you have good/bad experiences with, and what models you would like to see supported! [[comment here]](https://github.com/microsoft/data-formulator/issues/49)
30+
2531
- [11-07-2024] Minor fun update: data visualization challenges!
2632
- We added a few visualization challenges with the sample datasets. Can you complete them all? [[try them out!]](https://github.com/microsoft/data-formulator/issues/53#issue-2641841252)
2733
- Comment in the issue when you did, or share your results/questions with others! [[comment here]](https://github.com/microsoft/data-formulator/issues/53)
@@ -77,7 +83,7 @@ Play with Data Formulator with one of the following options:
7783

7884
## Using Data Formulator
7985

80-
Once youve completed the setup using either option, follow these steps to start using Data Formulator:
86+
Once you've completed the setup using either option, follow these steps to start using Data Formulator:
8187

8288
### The basics of data visualization
8389
* Provide OpenAI keys and select a model (GPT-4o suggested) and choose a dataset.

api-keys.env.template

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# OpenAI Configuration
2+
OPENAI_ENABLED=true
3+
OPENAI_API_KEY=#your-openai-api-key
4+
OPENAI_MODELS=gpt-4o,gpt-4o-mini # comma separated list of models
5+
6+
# Azure OpenAI Configuration
7+
AZURE_ENABLED=true
8+
AZURE_API_KEY=#your-azure-openai-api-key
9+
AZURE_API_BASE=https://your-azure-openai-endpoint.openai.azure.com/
10+
AZURE_API_VERSION=2024-02-15-preview
11+
AZURE_MODELS=gpt-4o
12+
13+
# Anthropic Configuration
14+
ANTHROPIC_ENABLED=true
15+
ANTHROPIC_API_KEY=#your-anthropic-api-key
16+
ANTHROPIC_MODELS=claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022
17+
18+
# Ollama Configuration
19+
OLLAMA_ENABLED=true
20+
OLLAMA_API_BASE=http://localhost:11434
21+
OLLAMA_MODELS=codellama:7b # models with good code generation capabilities recommended
22+
23+
# if you want to add other models, you can add them with PROVIDER_API_KEY=your-api-key, PROVIDER_MODELS=model1,model2 etc
24+
# (replacing PROVIDER with the provider name like GEMINI, ANTHROPIC, AZURE, OPENAI, OLLAMA etc. as long as they are supported by LiteLLM)

py-src/data_formulator/agents/agent_code_explanation.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,8 @@ def transform_data(df_0):
6666

6767
class CodeExplanationAgent(object):
6868

69-
def __init__(self, client, model):
69+
def __init__(self, client):
7070
self.client = client
71-
self.model = model
7271

7372
def run(self, input_tables, code):
7473

@@ -82,11 +81,8 @@ def run(self, input_tables, code):
8281
{"role":"user","content": user_query}]
8382

8483
###### the part that calls open_ai
85-
response = self.client.chat.completions.create(
86-
model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
87-
top_p=0.95, n=1, frequency_penalty=0, presence_penalty=0, stop=None)
84+
response = self.client.get_completion(messages = messages)
8885

89-
logger.info('\n=== explanation output ===>\n')
90-
logger.info(response.choices[0].message.content)
86+
logger.info(f"=== explanation output ===>\n{response.choices[0].message.content}\n")
9187

9288
return response.choices[0].message.content

py-src/data_formulator/agents/agent_concept_derive.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,8 @@
167167

168168
class ConceptDeriveAgent(object):
169169

170-
def __init__(self, client, model):
170+
def __init__(self, client):
171171
self.client = client
172-
self.model = model
173172

174173
def run(self, input_table, input_fields, output_field, description, n=1):
175174
"""derive a new concept based on input table, input fields, and output field name, (and description)
@@ -190,9 +189,7 @@ def run(self, input_table, input_fields, output_field, description, n=1):
190189
{"role":"user","content": user_query}]
191190

192191
###### the part that calls open_ai
193-
response = self.client.chat.completions.create(
194-
model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
195-
top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
192+
response = self.client.get_completion(messages = messages)
196193

197194
#log = {'messages': messages, 'response': response.model_dump(mode='json')}
198195

py-src/data_formulator/agents/agent_data_clean.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@
7878

7979
class DataCleanAgent(object):
8080

81-
def __init__(self, client, model):
82-
self.model = model
81+
def __init__(self, client):
8382
self.client = client
8483

8584
def run(self, content_type, raw_data, image_cleaning_instruction):
@@ -129,9 +128,7 @@ def run(self, content_type, raw_data, image_cleaning_instruction):
129128
messages = [system_message, user_prompt]
130129

131130
###### the part that calls open_ai
132-
response = self.client.chat.completions.create(
133-
model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
134-
top_p=0.95, n=1, frequency_penalty=0, presence_penalty=0, stop=None)
131+
response = self.client.get_completion(messages = messages)
135132

136133
candidates = []
137134
for choice in response.choices:

py-src/data_formulator/agents/agent_data_filter.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,8 @@ def filter_row(row, df):
125125

126126
class DataFilterAgent(object):
127127

128-
def __init__(self, client, model):
128+
def __init__(self, client):
129129
self.client = client
130-
self.model = model
131130

132131
def process_gpt_result(self, input_table, response, messages):
133132
#log = {'messages': messages, 'response': response.model_dump(mode='json')}
@@ -177,9 +176,7 @@ def run(self, input_table, description):
177176
{"role":"user","content": user_query}]
178177

179178
###### the part that calls open_ai
180-
response = self.client.chat.completions.create(
181-
model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
182-
top_p=0.95, n=1, frequency_penalty=0, presence_penalty=0, stop=None)
179+
response = self.client.get_completion(messages = messages)
183180

184181
return self.process_gpt_result(input_table, response, messages)
185182

@@ -190,8 +187,6 @@ def followup(self, input_table, dialog, new_instruction: str, n=1):
190187
"content": new_instruction + '\nupdate the filter function accordingly'}]
191188

192189
##### the part that calls open_ai
193-
response = self.client.chat.completions.create(
194-
model=self.model, messages=messages, temperature=0.7, max_tokens=1200,
195-
top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
190+
response = self.client.get_completion(messages = messages)
196191

197192
return self.process_gpt_result(input_table, response, messages)

py-src/data_formulator/agents/agent_data_load.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,8 @@
124124

125125
class DataLoadAgent(object):
126126

127-
def __init__(self, client, model):
127+
def __init__(self, client):
128128
self.client = client
129-
self.model = model
130129

131130
def run(self, input_data, n=1):
132131

@@ -140,9 +139,7 @@ def run(self, input_data, n=1):
140139
{"role":"user","content": user_query}]
141140

142141
###### the part that calls open_ai
143-
response = self.client.chat.completions.create(
144-
model=self.model, messages=messages, temperature=0.2, max_tokens=4096,
145-
top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
142+
response = self.client.get_completion(messages = messages)
146143

147144
#log = {'messages': messages, 'response': response.model_dump(mode='json')}
148145

py-src/data_formulator/agents/agent_data_rec.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,8 @@ def transform_data(df):
126126

127127
class DataRecAgent(object):
128128

129-
def __init__(self, client, model, system_prompt=None):
129+
def __init__(self, client, system_prompt=None):
130130
self.client = client
131-
self.model = model
132131
self.system_prompt = system_prompt if system_prompt is not None else SYSTEM_PROMPT
133132

134133
def process_gpt_response(self, input_tables, messages, response):
@@ -171,7 +170,7 @@ def process_gpt_response(self, input_tables, messages, response):
171170
logger.warning(error_message)
172171
result = {'status': 'other error', 'code': code_str, 'content': f"Unexpected error: {error_message}"}
173172
else:
174-
result = {'status': 'no transformation', 'code': "", 'content': input_tables[0]['rows']}
173+
result = {'status': 'error', 'code': "", 'content': "No code block found in the response. The model is unable to generate code to complete the task."}
175174

176175
result['dialog'] = [*messages, {"role": choice.message.role, "content": choice.message.content}]
177176
result['agent'] = 'DataRecAgent'
@@ -192,7 +191,7 @@ def run(self, input_tables, description, n=1):
192191
messages = [{"role":"system", "content": self.system_prompt},
193192
{"role":"user","content": user_query}]
194193

195-
response = completion_response_wrapper(self.client, self.model, messages, n)
194+
response = completion_response_wrapper(self.client, messages, n)
196195

197196
return self.process_gpt_response(input_tables, messages, response)
198197

@@ -204,7 +203,6 @@ def followup(self, input_tables, dialog, new_instruction: str, n=1):
204203

205204
messages = [*dialog, {"role":"user", "content": f"Update: \n\n{new_instruction}"}]
206205

207-
##### the part that calls open_ai
208-
response = completion_response_wrapper(self.client, self.model, messages, n)
206+
response = completion_response_wrapper(self.client, messages, n)
209207

210208
return self.process_gpt_response(input_tables, messages, response)

0 commit comments

Comments
 (0)