Merge pull request #36 from microsoft/dev

Chenglong-MS · web-flow · commit c5f86f0c3a5d · 2024-10-18T15:27:21.000-07:00
Dev
diff --git a/py-src/data_formulator/agents/agent_data_rec.py b/py-src/data_formulator/agents/agent_data_rec.py
@@ -137,7 +137,7 @@ def process_gpt_response(self, input_tables, messages, response):
         #log = {'messages': messages, 'response': response.model_dump(mode='json')}
 
         if isinstance(response, Exception):
-            result = {'status': 'other error', 'content': response.body}
+            result = {'status': 'other error', 'content': str(response.body)}
             return [result]
         
         candidates = []
@@ -156,22 +156,22 @@ def process_gpt_response(self, input_tables, messages, response):
 
             if len(code_blocks) > 0:
                 code_str = code_blocks[-1]
+
                 try:
                     result = py_sandbox.run_transform_in_sandbox2020(code_str, [t['rows'] for t in input_tables])
+                    result['code'] = code_str
 
                     if result['status'] == 'ok':
-                        new_data = json.loads(result['content'])
-                        result['content'] = new_data
+                        result['content'] = json.loads(result['content'])
                     else:
                         logger.info(result['content'])
-                    result['code'] = code_str
                 except Exception as e:
                     logger.warning('other error:')
                     error_message = traceback.format_exc()
                     logger.warning(error_message)
-                    result = {'status': 'other error', 'content': error_message}
+                    result = {'status': 'other error', 'code': code_str, 'content': f"Unexpected error: {error_message}"}
             else:
-                result = {'status': 'no transformation', 'content': input_tables[0]['rows']}
+                result = {'status': 'no transformation', 'code': "", 'content': input_tables[0]['rows']}
             
             result['dialog'] = [*messages, {"role": choice.message.role, "content": choice.message.content}]
             result['agent'] = 'DataRecAgent'
diff --git a/py-src/data_formulator/agents/agent_data_transform_v2.py b/py-src/data_formulator/agents/agent_data_transform_v2.py
@@ -205,7 +205,7 @@ def process_gpt_response(self, input_tables, messages, response):
         #logger.info(response.prompt_filter_results)
 
         if isinstance(response, Exception):
-            result = {'status': 'other error', 'content': response.body}
+            result = {'status': 'other error', 'content': str(response.body)}
             return [result]
         
         candidates = []
@@ -223,22 +223,23 @@ def process_gpt_response(self, input_tables, messages, response):
 
             if len(code_blocks) > 0:
                 code_str = code_blocks[-1]
+
                 try:
                     result = py_sandbox.run_transform_in_sandbox2020(code_str, [t['rows'] for t in input_tables])
+                    result['code'] = code_str
 
                     if result['status'] == 'ok':
-                        new_data = json.loads(result['content'])
-                        result['content'] = new_data
+                        # parse the content
+                        result['content'] = json.loads(result['content'])
                     else:
                         logger.info(result['content'])
-                    result['code'] = code_str
                 except Exception as e:
-                    logger.warning('other error:')
-                    error_message = traceback.format_exc()
+                    logger.warning('Error occurred during code execution:')
+                    error_message = f"An error occurred during code execution. Error type: {type(e).__name__}"
                     logger.warning(error_message)
-                    result = {'status': 'other error', 'content': error_message}
+                    result = {'status': 'other error', 'code': code_str, 'content': error_message}
             else:
-                result = {'status': 'no transformation', 'content': input_tables[0]['rows']}
+                result = {'status': 'no transformation', 'code': "", 'content': input_tables[0]['rows']}
             
             result['dialog'] = [*messages, {"role": choice.message.role, "content": choice.message.content}]
             result['agent'] = 'DataTransformationAgent'
@@ -264,11 +265,6 @@ def run(self, input_tables, description, expected_fields: list[str], n=1):
         messages = [{"role":"system", "content": self.system_prompt},
                     {"role":"user","content": user_query}]
         
-        ###### the part that calls open_ai
-        # response = self.client.chat.completions.create(
-        #     model=self.model, messages = messages, temperature=0.7, max_tokens=1200,
-        #     top_p=0.95, n=n, frequency_penalty=0, presence_penalty=0, stop=None)
-
         response = completion_response_wrapper(self.client, self.model, messages, n)
 
         return self.process_gpt_response(input_tables, messages, response)
diff --git a/py-src/data_formulator/app.py b/py-src/data_formulator/app.py
@@ -147,15 +147,18 @@ def test_model():
                     "endpoint": endpoint,
                     "key": key,
                     "model": model,
-                    "status": 'ok'
+                    "status": 'ok',
+                    "message": ""
                 }
         except Exception as e:
-            print(e)
+            print(f"Error: {e}")
+            error_message = str(e)
             result = {
                 "endpoint": endpoint,
                 "key": key,
                 "model": model,
-                "status": 'error'
+                "status": 'error',
+                "message": error_message,
             }
     else:
         {'status': 'error'}
@@ -362,7 +365,7 @@ def derive_data():
             results = agent.run(input_tables, instruction, [field['name'] for field in new_fields])
 
         repair_attempts = 0
-        while results[0]['status'] == 'error' and repair_attempts < 2:
+        while results[0]['status'] == 'error' and repair_attempts == 0: # only try once
             error_message = results[0]['content']
             new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur."
 
@@ -375,35 +378,13 @@ def derive_data():
 
             repair_attempts += 1
         
-        response = flask.jsonify({ "status": "ok", "token": token, "results": results })
+        response = flask.jsonify({ "token": token, "status": "ok", "results": results })
     else:
         response = flask.jsonify({ "token": "", "status": "error", "results": [] })
 
     response.headers.add('Access-Control-Allow-Origin', '*')
     return response
 
-
-@app.route('/code-expl', methods=['GET', 'POST'])
-def request_code_expl():
-    if request.is_json:
-        app.logger.info("# request data: ")
-        content = request.get_json()        
-        token = content["token"]
-
-        client = get_client(content['model']['endpoint'], content['model']['key'])
-        model = content['model']['model']
-        app.logger.info(f" model: {content['model']}")
-
-        # each table is a dict with {"name": xxx, "rows": [...]}
-        input_tables = content["input_tables"]
-        code = content["code"]
-        
-        code_expl_agent = CodeExplanationAgent(client=client, model=model)
-        expl = code_expl_agent.run(input_tables, code)
-    else:
-        expl = ""
-    return expl
-
 @app.route('/refine-data', methods=['GET', 'POST'])
 def refine_data():
 
@@ -423,30 +404,48 @@ def refine_data():
         new_instruction = content["new_instruction"]
         
         print("previous dialog")
-        print(dialog[0]['content'])
+        print(dialog)
 
         # always resort to the data transform agent       
         agent = DataTransformationAgentV2(client, model=model)
         results = agent.followup(input_tables, dialog, [field['name'] for field in output_fields], new_instruction)
 
         repair_attempts = 0
-        while results[0]['status'] == 'error' and repair_attempts < 2:
+        while results[0]['status'] == 'error' and repair_attempts == 0: # only try once
             error_message = results[0]['content']
             new_instruction = f"We run into the following problem executing the code, please fix it:\n\n{error_message}\n\nPlease think step by step, reflect why the error happens and fix the code so that no more errors would occur."
-
-            response_message = dialog['response']['choices'][0]['message']
-            prev_dialog = [*dialog['messages'], {"role": response_message['role'], 'content': response_message['content']}]
+            prev_dialog = results[0]['dialog']
 
             results = agent.followup(input_tables, prev_dialog, [field['name'] for field in output_fields], new_instruction)
             repair_attempts += 1
 
-        response = flask.jsonify({ "status": "ok", "token": token, "results": results})
+        response = flask.jsonify({ "token": token, "status": "ok", "results": results})
     else:
         response = flask.jsonify({ "token": "", "status": "error", "results": []})
 
     response.headers.add('Access-Control-Allow-Origin', '*')
     return response
 
+@app.route('/code-expl', methods=['GET', 'POST'])
+def request_code_expl():
+    if request.is_json:
+        app.logger.info("# request data: ")
+        content = request.get_json()        
+        token = content["token"]
+
+        client = get_client(content['model']['endpoint'], content['model']['key'])
+        model = content['model']['model']
+        app.logger.info(f" model: {content['model']}")
+
+        # each table is a dict with {"name": xxx, "rows": [...]}
+        input_tables = content["input_tables"]
+        code = content["code"]
+        
+        code_expl_agent = CodeExplanationAgent(client=client, model=model)
+        expl = code_expl_agent.run(input_tables, code)
+    else:
+        expl = ""
+    return expl
 def run_app():
     port = 5000 #+ random.randint(0, 999)
     url = "http://localhost:{0}".format(port)
diff --git a/py-src/data_formulator/py_sandbox.py b/py-src/data_formulator/py_sandbox.py
@@ -38,7 +38,7 @@ def block_mischief(event,arg):
     try:
         exec(code, allowed_objects)
     except Exception as err:
-        error_message = traceback.format_exc()
+        error_message = f"Error: {type(err).__name__} - {str(err)}"
         conn.send({'status': 'error', 'content': error_message})
         conn.close()
         return allowed_objects
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "data_formulator"
-version = "0.1.2"
+version = "0.1.3"
 
 requires-python = ">=3.9"
 authors = [
diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx
@@ -30,7 +30,7 @@ export interface DataFormulatorState {
 
     oaiModels: {endpoint: string, key: string, model: string }[];
     selectedModel: {endpoint: string, model: string}  | undefined;
-    testedModels: {endpoint: string, model: string, status: 'ok' | 'error' | 'testing' | 'unknown'}[];
+    testedModels: {endpoint: string, model: string, status: 'ok' | 'error' | 'testing' | 'unknown', message: string}[];
 
     tables : DictTable[];
     charts: Chart[];
@@ -278,12 +278,13 @@ export const dataFormulatorSlice = createSlice({
             state.oaiModels = state.oaiModels.filter(oaiModel => oaiModel.model != model || oaiModel.endpoint != endpoint );
             state.testedModels = state.testedModels.filter(m => !(m.model == model && m.endpoint == endpoint));
         },
-        updateModelStatus: (state, action: PayloadAction<{model: string, endpoint: string, status: 'ok' | 'error' | 'testing' | 'unknown'}>) => {
+        updateModelStatus: (state, action: PayloadAction<{model: string, endpoint: string, status: 'ok' | 'error' | 'testing' | 'unknown', message: string}>) => {
             let model = action.payload.model;
             let endpoint = action.payload.endpoint;
             let status = action.payload.status;
-
-            state.testedModels = [...state.testedModels.filter(t => !(t.model == model && t.endpoint == endpoint)), {model, endpoint, status} ]
+            let message = action.payload.message;
+            
+            state.testedModels = [...state.testedModels.filter(t => !(t.model == model && t.endpoint == endpoint)), {model, endpoint, status, message} ]
         },
         addTable: (state, action: PayloadAction<DictTable>) => {
             let table = action.payload;
diff --git a/src/views/DataFormulator.tsx b/src/views/DataFormulator.tsx
@@ -18,6 +18,7 @@ import {
     Typography,
     Box,
     Tooltip,
+    Button,
 } from '@mui/material';
 
 
@@ -149,6 +150,10 @@ Totals (7 entries)	5	5	5	15
                 <Tooltip title={<Box>Example of a table in image format: <Box component="img" sx={{ width: '100%',  marginTop: '6px' }} alt="" src={exampleImageTable} /></Box>}><Typography color="secondary"  display="inline" sx={{cursor: 'help', "&:hover": {textDecoration: 'underline'}}}>an image</Typography></Tooltip> that contain data into clipboard to get started.
             </Typography>
         </Box>
+        <Button size="small" color="inherit" 
+                sx={{position: "absolute", color:'darkgray', bottom: 0, right: 0, textTransform: 'none'}} 
+                target="_blank" rel="noopener noreferrer" 
+                href="https://privacy.microsoft.com/en-US/data-privacy-notice">view data privacy notice</Button>
     </Box>;
 
     let modelSelectionDialogBox = <Box sx={{width: '100vw'}}>
@@ -162,6 +167,10 @@ Totals (7 entries)	5	5	5	15
             </Typography>
             <Typography variant="body1">Specify an OpenAI or Azure OpenAI endpoint to run {toolName}.</Typography>
         </Box>
+        <Button size="small" color="inherit" 
+                sx={{position: "absolute", color:'darkgray', bottom: 0, right: 0, textTransform: 'none'}} 
+                target="_blank" rel="noopener noreferrer" 
+                href="https://privacy.microsoft.com/en-US/data-privacy-notice">view data privacy notice</Button>
     </Box>;
 
     console.log("selected model?")
@@ -172,5 +181,7 @@ Totals (7 entries)	5	5	5	15
             <DndProvider backend={HTML5Backend}>
                 {selectedModel == undefined ? modelSelectionDialogBox : (tables.length > 0 ? fixedSplitPane : dataUploadRequestBox)} 
             </DndProvider>
+            
+
         </Box>);
 }
diff --git a/src/views/EncodingShelfCard.tsx b/src/views/EncodingShelfCard.tsx
@@ -311,16 +311,22 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                 dispatch(dfActions.changeChartRunningStatus({chartId, status: false}))
                 console.log(data);
                 console.log(token);
-                if (data["status"] == "ok") {
+                if (data.results.length > 0) {
                     if (data["token"] == token) {
                         let candidates = data["results"].filter((item: any) => {
-                            return item["content"].length > 0 
+                            return item["status"] == "ok" && item["content"].length > 0 
                         });
+
                         if (candidates.length == 0) {
+                            let errorMessage = data.results[0].content;
+                            let code = data.results[0].code;
+
                             dispatch(dfActions.addMessages({
                                 "timestamp": Date.now(),
                                 "type": "error",
-                                "value": "Unable to find a data transformation for the chart, please check concepts, encodings and clarification questions."
+                                "value": `Data formulation failed, please retry.`,
+                                "code": code,
+                                "detail": errorMessage
                             }));
                         } else {
 
@@ -444,7 +450,7 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                             dispatch(dfActions.addMessages({
                                 "timestamp": Date.now(),
                                 "type": "success",
-                                "value": `Data formulation for ${fieldNamesStr} complete, found ${candidates.length} candidates.`
+                                "value": `Data formulation for ${fieldNamesStr} succeeded.`
                             }));
                         }
                     }
@@ -453,7 +459,7 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                     dispatch(dfActions.addMessages({
                         "timestamp": Date.now(),
                         "type": "error",
-                        "value": "unable to perform data formulation."
+                        "value": "No result is returned from the data formulation agent. Please try again."
                     }));
                 }
             }).catch((error) => {
@@ -462,7 +468,8 @@ export const EncodingShelfCard: FC<EncodingShelfCardProps> = function ({ chartId
                 dispatch(dfActions.addMessages({
                     "timestamp": Date.now(),
                     "type": "error",
-                    "value": `Data formulation for ${fieldNamesStr} fails, maybe try something differently?`
+                    "value": `Data formulation failed, please try again.`,
+                    "detail": error.message
                 }));
             });
     }
diff --git a/src/views/EncodingShelfThread.tsx b/src/views/EncodingShelfThread.tsx
diff --git a/src/views/MessageSnackbar.tsx b/src/views/MessageSnackbar.tsx
diff --git a/src/views/ModelSelectionDialog.tsx b/src/views/ModelSelectionDialog.tsx