microsoft
diff --git a/‎.gitignore
Lines changed: 1 addition & 2 deletions b/‎.gitignore
Lines changed: 1 addition & 2 deletions
diff --git a/‎DEVELOPMENT.md
Lines changed: 7 additions & 2 deletions b/‎DEVELOPMENT.md
Lines changed: 7 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 7 additions & 1 deletion b/‎README.md
Lines changed: 7 additions & 1 deletion
diff --git a/‎api-keys.env.template
Lines changed: 24 additions & 0 deletions b/‎api-keys.env.template
Lines changed: 24 additions & 0 deletions
diff --git a/‎py-src/data_formulator/agents/agent_code_explanation.py
Lines changed: 3 additions & 7 deletions b/‎py-src/data_formulator/agents/agent_code_explanation.py
Lines changed: 3 additions & 7 deletions
diff --git a/‎py-src/data_formulator/agents/agent_concept_derive.py
Lines changed: 2 additions & 5 deletions b/‎py-src/data_formulator/agents/agent_concept_derive.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎py-src/data_formulator/agents/agent_data_clean.py
Lines changed: 2 additions & 5 deletions b/‎py-src/data_formulator/agents/agent_data_clean.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎py-src/data_formulator/agents/agent_data_filter.py
Lines changed: 3 additions & 8 deletions b/‎py-src/data_formulator/agents/agent_data_filter.py
Lines changed: 3 additions & 8 deletions
diff --git a/‎py-src/data_formulator/agents/agent_data_load.py
Lines changed: 2 additions & 5 deletions b/‎py-src/data_formulator/agents/agent_data_load.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎py-src/data_formulator/agents/agent_data_rec.py
Lines changed: 4 additions & 6 deletions b/‎py-src/data_formulator/agents/agent_data_rec.py
Lines changed: 4 additions & 6 deletions
@@ -1,6 +1,5 @@
 
-
-*openai-keys.env 
+*api-keys.env
 **/*.ipynb_checkpoints/
 
 .DS_Store
 
@@ -18,16 +18,21 @@ How to set up your local machine.
     ```bash
     pip install -r requirements.txt
     ```
+- **Configure environment variable (optional)s**
+    - copy `api-keys.env.example` to `api-keys.env` and add your API keys.
+    - required fields for different providers are different, please refer to the [LiteLLM setup](https://docs.litellm.ai/docs#litellm-python-sdk) guide for more details.
+        - currently only endpoint, model, api_key, api_base, api_version are supported.
+    - this helps data formulator to automatically load the API keys when you run the app, so you don't need to set the API keys in the app UI.
 
-- **Run**
+- **Run the app**
     - **Windows**
     ```bash
     .\local_server.bat
     ```
 
     - **Unix-based**
     ```bash
-    .\local_server.sh
+    ./local_server.sh
     ```
 
 ## Frontend (TypeScript)
 
@@ -22,6 +22,12 @@ Transform data and create rich visualizations iteratively with AI 🪄. Try Data
 
 ## News 🔥🔥🔥
 
+- [02-12-2025] More models supported now! Powered by [LiteLLM](https://github.com/BerriAI/litellm)!
+  - Now supports OpenAI, Azure, Ollama, and Anthropic models (and more based on LiteLLM);
+  - Models with strong code generation capabilities are recommended (gpt-4o, claude-3-5-sonnet etc.);
+  - You can store API keys in `api-keys.env` to avoid typing them every time (see template `api-keys.env.template`).
+  - Let us know which models you have good/bad experiences with, and what models you would like to see supported! [[comment here]](https://github.com/microsoft/data-formulator/issues/49)
+
 - [11-07-2024] Minor fun update: data visualization challenges!
   - We added a few visualization challenges with the sample datasets. Can you complete them all? [[try them out!]](https://github.com/microsoft/data-formulator/issues/53#issue-2641841252)
   - Comment in the issue when you did, or share your results/questions with others! [[comment here]](https://github.com/microsoft/data-formulator/issues/53)
@@ -77,7 +83,7 @@ Play with Data Formulator with one of the following options:
 
 ## Using Data Formulator
 
-Once you’ve completed the setup using either option, follow these steps to start using Data Formulator:
+Once you've completed the setup using either option, follow these steps to start using Data Formulator:
 
 ### The basics of data visualization
 * Provide OpenAI keys and select a model (GPT-4o suggested) and choose a dataset.
 
@@ -0,0 +1,24 @@
+# OpenAI Configuration
+OPENAI_ENABLED=true
+OPENAI_API_KEY=#your-openai-api-key
+OPENAI_MODELS=gpt-4o,gpt-4o-mini # comma separated list of models
+
+# Azure OpenAI Configuration
+AZURE_ENABLED=true
+AZURE_API_KEY=#your-azure-openai-api-key
+AZURE_API_BASE=https://your-azure-openai-endpoint.openai.azure.com/
+AZURE_API_VERSION=2024-02-15-preview
+AZURE_MODELS=gpt-4o
+
+# Anthropic Configuration
+ANTHROPIC_ENABLED=true
+ANTHROPIC_API_KEY=#your-anthropic-api-key
+ANTHROPIC_MODELS=claude-3-5-sonnet-20241022,claude-3-5-haiku-20241022
+
+# Ollama Configuration
+OLLAMA_ENABLED=true
+OLLAMA_API_BASE=http://localhost:11434
+OLLAMA_MODELS=codellama:7b # models with good code generation capabilities recommended
+
+# if you want to add other models, you can add them with PROVIDER_API_KEY=your-api-key, PROVIDER_MODELS=model1,model2 etc 
+# (replacing PROVIDER with the provider name like GEMINI, ANTHROPIC, AZURE, OPENAI, OLLAMA etc. as long as they are supported by LiteLLM)
@@ -66,9 +66,8 @@ def transform_data(df_0):
 
 class CodeExplanationAgent(object):
 
-    def __init__(self, client, model):
+    def __init__(self, client):
         self.client = client
-        self.model = model
 
     def run(self, input_tables, code):
 
@@ -82,11 +81,8 @@ def run(self, input_tables, code):
                     {"role":"user","content": user_query}]
 
         ###### the part that calls open_ai
-        response = self.client.chat.completions.create(
-            model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
-            top_p=0.95, n=1, frequency_penalty=0, presence_penalty=0, stop=None)
+        response = self.client.get_completion(messages = messages)
 
-        logger.info('\n=== explanation output ===>\n')
-        logger.info(response.choices[0].message.content)
+        logger.info(f"=== explanation output ===>\n{response.choices[0].message.content}\n")
 
         return response.choices[0].message.content
@@ -167,9 +167,8 @@
 
 class ConceptDeriveAgent(object):
 
-    def __init__(self, client, model):
+    def __init__(self, client):
         self.client = client
-        self.model = model
 
     def run(self, input_table, input_fields, output_field, description, n=1):
         """derive a new concept based on input table, input fields, and output field name, (and description)
@@ -190,9 +189,7 @@ def run(self, input_table, input_fields, output_field, description, n=1):
                     {"role":"user","content": user_query}]
 
         ###### the part that calls open_ai
-        response = self.client.chat.completions.create(
-            model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
-            top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
+        response = self.client.get_completion(messages = messages)
 
         #log = {'messages': messages, 'response': response.model_dump(mode='json')}
 
 
@@ -78,8 +78,7 @@
 
 class DataCleanAgent(object):
 
-    def __init__(self, client, model):
-        self.model = model
+    def __init__(self, client):
         self.client = client
 
     def run(self, content_type, raw_data, image_cleaning_instruction):
@@ -129,9 +128,7 @@ def run(self, content_type, raw_data, image_cleaning_instruction):
         messages = [system_message, user_prompt]
 
         ###### the part that calls open_ai
-        response = self.client.chat.completions.create(
-            model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
-            top_p=0.95, n=1, frequency_penalty=0, presence_penalty=0, stop=None)
+        response = self.client.get_completion(messages = messages)
 
         candidates = []
         for choice in response.choices:
 
@@ -125,9 +125,8 @@ def filter_row(row, df):
 
 class DataFilterAgent(object):
 
-    def __init__(self, client, model):
+    def __init__(self, client):
         self.client = client
-        self.model = model
 
     def process_gpt_result(self, input_table, response, messages):
         #log = {'messages': messages, 'response': response.model_dump(mode='json')}
@@ -177,9 +176,7 @@ def run(self, input_table, description):
                     {"role":"user","content": user_query}]
 
         ###### the part that calls open_ai
-        response = self.client.chat.completions.create(
-            model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
-            top_p=0.95, n=1, frequency_penalty=0, presence_penalty=0, stop=None)
+        response = self.client.get_completion(messages = messages)
 
         return self.process_gpt_result(input_table, response, messages)
 
@@ -190,8 +187,6 @@ def followup(self, input_table, dialog, new_instruction: str, n=1):
                               "content": new_instruction + '\nupdate the filter function accordingly'}]
 
         ##### the part that calls open_ai
-        response = self.client.chat.completions.create(
-            model=self.model, messages=messages, temperature=0.7, max_tokens=1200,
-            top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
+        response = self.client.get_completion(messages = messages)
 
         return self.process_gpt_result(input_table, response, messages)
@@ -124,9 +124,8 @@
 
 class DataLoadAgent(object):
 
-    def __init__(self, client, model):
+    def __init__(self, client):
         self.client = client
-        self.model = model
 
     def run(self, input_data, n=1):
 
@@ -140,9 +139,7 @@ def run(self, input_data, n=1):
                     {"role":"user","content": user_query}]
 
         ###### the part that calls open_ai
-        response = self.client.chat.completions.create(
-            model=self.model, messages=messages, temperature=0.2, max_tokens=4096,
-            top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
+        response = self.client.get_completion(messages = messages)
 
         #log = {'messages': messages, 'response': response.model_dump(mode='json')}
 
 
@@ -126,9 +126,8 @@ def transform_data(df):
 
 class DataRecAgent(object):
 
-    def __init__(self, client, model, system_prompt=None):
+    def __init__(self, client, system_prompt=None):
         self.client = client
-        self.model = model
         self.system_prompt = system_prompt if system_prompt is not None else SYSTEM_PROMPT
 
     def process_gpt_response(self, input_tables, messages, response):
@@ -171,7 +170,7 @@ def process_gpt_response(self, input_tables, messages, response):
                     logger.warning(error_message)
                     result = {'status': 'other error', 'code': code_str, 'content': f"Unexpected error: {error_message}"}
             else:
-                result = {'status': 'no transformation', 'code': "", 'content': input_tables[0]['rows']}
+                result = {'status': 'error', 'code': "", 'content': "No code block found in the response. The model is unable to generate code to complete the task."}
 
             result['dialog'] = [*messages, {"role": choice.message.role, "content": choice.message.content}]
             result['agent'] = 'DataRecAgent'
@@ -192,7 +191,7 @@ def run(self, input_tables, description, n=1):
         messages = [{"role":"system", "content": self.system_prompt},
                     {"role":"user","content": user_query}]
 
-        response = completion_response_wrapper(self.client, self.model, messages, n)
+        response = completion_response_wrapper(self.client, messages, n)
 
         return self.process_gpt_response(input_tables, messages, response)
 
@@ -204,7 +203,6 @@ def followup(self, input_tables, dialog, new_instruction: str, n=1):
 
         messages = [*dialog, {"role":"user", "content": f"Update: \n\n{new_instruction}"}]
 
-        ##### the part that calls open_ai
-        response = completion_response_wrapper(self.client, self.model, messages, n)
+        response = completion_response_wrapper(self.client, messages, n)
 
         return self.process_gpt_response(input_tables, messages, response)