microsoft
diff --git a/‎README.md
Lines changed: 2 additions & 1 deletion b/‎README.md
Lines changed: 2 additions & 1 deletion
diff --git a/‎local_server.bat
Lines changed: 1 addition & 3 deletions b/‎local_server.bat
Lines changed: 1 addition & 3 deletions
diff --git a/‎local_server.sh
Lines changed: 3 additions & 1 deletion b/‎local_server.sh
Lines changed: 3 additions & 1 deletion
diff --git a/‎package.json
Lines changed: 2 additions & 2 deletions b/‎package.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎py-src/data_formulator/agent_routes.py
Lines changed: 0 additions & 2 deletions b/‎py-src/data_formulator/agent_routes.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎py-src/data_formulator/app.py
Lines changed: 14 additions & 5 deletions b/‎py-src/data_formulator/app.py
Lines changed: 14 additions & 5 deletions
diff --git a/‎py-src/data_formulator/data_loader/__init__.py
Lines changed: 6 additions & 1 deletion b/‎py-src/data_formulator/data_loader/__init__.py
Lines changed: 6 additions & 1 deletion
diff --git a/‎py-src/data_formulator/data_loader/azure_blob_data_loader.py
Lines changed: 6 additions & 3 deletions b/‎py-src/data_formulator/data_loader/azure_blob_data_loader.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎py-src/data_formulator/data_loader/external_data_loader.py
Lines changed: 25 additions & 3 deletions b/‎py-src/data_formulator/data_loader/external_data_loader.py
Lines changed: 25 additions & 3 deletions
diff --git a/‎py-src/data_formulator/data_loader/kusto_data_loader.py
Lines changed: 93 additions & 7 deletions b/‎py-src/data_formulator/data_loader/kusto_data_loader.py
Lines changed: 93 additions & 7 deletions
@@ -26,9 +26,10 @@ Any questions? Ask on the Discord channel! [![Discord](https://img.shields.io/ba
 
 ## News 🔥🔥🔥
 
-- [05-13-2025] Data Formulator 0.2.3: External Data Loader 
+- [05-13-2025] Data Formulator 0.2.3 / 0.2.4: External Data Loader 
   - We introduced external data loader class to make import data easier. [Readme](https://github.com/microsoft/data-formulator/tree/main/py-src/data_formulator/data_loader) and [Demo](https://github.com/microsoft/data-formulator/pull/155)
     - Current data loaders: MySQL, Azure Data Explorer (Kusto), Azure Blob and Amazon S3 (json, parquet, csv).
+    - [07-01-2025] Updated with: Postgresql, mssql.
   - Call for action [link](https://github.com/microsoft/data-formulator/issues/156):
     - Users: let us know which data source you'd like to load data from.
     - Developers: let's build more data loaders.
 
@@ -6,7 +6,5 @@
 :: set http_proxy=http://127.0.0.1:7890
 :: set https_proxy=http://127.0.0.1:7890
 
-set FLASK_APP=py-src/data_formulator/app.py
 set FLASK_RUN_PORT=5000
-set FLASK_RUN_HOST=0.0.0.0
-flask run
+python -m py-src.data_formulator.app --port %FLASK_RUN_PORT% --dev
@@ -5,4 +5,6 @@
 # export http_proxy=http://127.0.0.1:7890
 # export https_proxy=http://127.0.0.1:7890
 
-env FLASK_APP=py-src/data_formulator/app.py FLASK_RUN_PORT=5000 FLASK_RUN_HOST=0.0.0.0 flask run
+#env FLASK_APP=py-src/data_formulator/app.py FLASK_RUN_PORT=5000 FLASK_RUN_HOST=0.0.0.0 flask run
+export FLASK_RUN_PORT=5000
+python -m py-src.data_formulator.app --port ${FLASK_RUN_PORT} --dev
@@ -7,8 +7,8 @@
         "@emotion/react": "^11.14.0",
         "@emotion/styled": "^11.14.0",
         "@fontsource/roboto": "^4.5.5",
-        "@mui/icons-material": "^5.14.0",
-        "@mui/material": "^7.0.2",
+        "@mui/icons-material": "^7.1.1",
+        "@mui/material": "^7.1.1",
         "@reduxjs/toolkit": "^1.8.6",
         "@types/dompurify": "^3.0.5",
         "@types/validator": "^13.12.2",
 
@@ -425,8 +425,6 @@ def request_code_expl():
     if request.is_json:
         logger.info("# request data: ")
         content = request.get_json()        
-        token = content["token"]
-
         client = get_client(content['model'])
 
         # each table is a dict with {"name": xxx, "rows": [...]}
 
@@ -29,14 +29,17 @@
 from dotenv import load_dotenv
 import secrets
 import base64
-APP_ROOT = Path(os.path.join(Path(__file__).parent)).absolute()
+APP_ROOT = Path(Path(__file__).parent).absolute()
 
 import os
 
 # blueprints
 from data_formulator.tables_routes import tables_bp
 from data_formulator.agent_routes import agent_bp
+from data_formulator.sse_routes import sse_bp
 
+import queue
+from typing import Dict, Any
 
 app = Flask(__name__, static_url_path='', static_folder=os.path.join(APP_ROOT, "dist"))
 app.secret_key = secrets.token_hex(16)  # Generate a random secret key for sessions
@@ -65,6 +68,7 @@ def default(self, obj):
 # register blueprints
 app.register_blueprint(tables_bp)
 app.register_blueprint(agent_bp)
+app.register_blueprint(sse_bp)
 
 print(APP_ROOT)
 
@@ -252,6 +256,8 @@ def parse_args() -> argparse.Namespace:
         help="Whether to execute python in subprocess, it makes the app more secure (reducing the chance for the model to access the local machine), but increases the time of response")
     parser.add_argument("-d", "--disable-display-keys", action='store_true', default=False,
         help="Whether disable displaying keys in the frontend UI, recommended to turn on if you host the app not just for yourself.")
+    parser.add_argument("--dev", action='store_true', default=False,
+        help="Launch the app in development mode (prevents the app from opening the browser automatically)")
     return parser.parse_args()
 
 
@@ -264,11 +270,14 @@ def run_app():
         'disable_display_keys': args.disable_display_keys
     }
 
-    url = "http://localhost:{0}".format(args.port)
-    threading.Timer(2, lambda: webbrowser.open(url, new=2)).start()
+    if not args.dev:
+        url = "http://localhost:{0}".format(args.port)
+        threading.Timer(2, lambda: webbrowser.open(url, new=2)).start()
+
+    # Enable debug mode and auto-reload in development mode
+    debug_mode = args.dev
+    app.run(host='0.0.0.0', port=args.port, threaded=True, debug=debug_mode, use_reloader=debug_mode)
 
-    app.run(host='0.0.0.0', port=args.port, threaded=True)
-    
 if __name__ == '__main__':
     #app.run(debug=True, host='127.0.0.1', port=5000)
     #use 0.0.0.0 for public
 
@@ -1,14 +1,19 @@
 from data_formulator.data_loader.external_data_loader import ExternalDataLoader
 from data_formulator.data_loader.mysql_data_loader import MySQLDataLoader
+from data_formulator.data_loader.mssql_data_loader import MSSQLDataLoader
 from data_formulator.data_loader.kusto_data_loader import KustoDataLoader
 from data_formulator.data_loader.s3_data_loader import S3DataLoader
 from data_formulator.data_loader.azure_blob_data_loader import AzureBlobDataLoader
+from data_formulator.data_loader.postgresql_data_loader import PostgreSQLDataLoader
 
 DATA_LOADERS = {
     "mysql": MySQLDataLoader,
+    "mssql": MSSQLDataLoader,
     "kusto": KustoDataLoader,
     "s3": S3DataLoader,
     "azure_blob": AzureBlobDataLoader,
+    "postgresql": PostgreSQLDataLoader
 }
 
-__all__ = ["ExternalDataLoader", "MySQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader", "DATA_LOADERS"]
+__all__ = ["ExternalDataLoader", "MySQLDataLoader", "MSSQLDataLoader", "KustoDataLoader", "S3DataLoader", "AzureBlobDataLoader","PostgreSQLDataLoader","DATA_LOADERS"]
+
@@ -116,7 +116,7 @@ def _setup_azure_authentication(self):
                 )
             """)
 
-    def list_tables(self) -> List[Dict[str, Any]]:
+    def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]:
         # Use Azure SDK to list blobs in the container
         from azure.storage.blob import BlobServiceClient
 
@@ -145,8 +145,7 @@ def list_tables(self) -> List[Dict[str, Any]]:
         container_client = blob_service_client.get_container_client(self.container_name)
 
         # List blobs in the container
-        blob_list = container_client.list_blobs()
-        
+        blob_list = container_client.list_blobs()        
         results = []
 
         for blob in blob_list:
@@ -156,6 +155,10 @@ def list_tables(self) -> List[Dict[str, Any]]:
             if blob_name.endswith('/') or not self._is_supported_file(blob_name):
                 continue
 
+            # Apply table filter if provided
+            if table_filter and table_filter.lower() not in blob_name.lower():
+                continue
+            
             # Create Azure blob URL
             azure_url = f"az://{self.account_name}.{self.endpoint}/{self.container_name}/{blob_name}"
 
 
@@ -44,6 +44,18 @@ def sanitize_table_name(name_as: str) -> str:
 class ExternalDataLoader(ABC):
 
     def ingest_df_to_duckdb(self, df: pd.DataFrame, table_name: str):
+        # Log DataFrame info before ingestion
+        import logging
+        logger = logging.getLogger(__name__)
+        logger.info(f"Ingesting DataFrame to DuckDB table '{table_name}'")
+        logger.info(f"DataFrame shape: {df.shape}")
+        logger.info(f"DataFrame dtypes: {dict(df.dtypes)}")
+        
+        # Log sample of datetime columns
+        for col in df.columns:
+            if pd.api.types.is_datetime64_any_dtype(df[col]):
+                sample_values = df[col].dropna().head(3)
+                logger.info(f"Datetime column '{col}' sample values: {list(sample_values)}")
 
         base_name = table_name
         counter = 1
@@ -59,8 +71,19 @@ def ingest_df_to_duckdb(self, df: pd.DataFrame, table_name: str):
         # Create table
         random_suffix = ''.join(random.choices(string.ascii_letters + string.digits, k=6))
         self.duck_db_conn.register(f'df_temp_{random_suffix}', df)
+        
+        # Log table schema after registration
+        try:
+            schema_info = self.duck_db_conn.execute(f"DESCRIBE df_temp_{random_suffix}").fetchall()
+            logger.info(f"DuckDB table schema: {schema_info}")
+        except Exception as e:
+            logger.warning(f"Could not get schema info: {e}")
+        
         self.duck_db_conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM df_temp_{random_suffix}")
         self.duck_db_conn.execute(f"DROP VIEW df_temp_{random_suffix}")  # Drop the temporary view after creating the table
+        
+        logger.info(f"Successfully created DuckDB table '{table_name}'")
+    
 
     @staticmethod
     @abstractmethod
@@ -69,15 +92,14 @@ def list_params() -> List[Dict[str, Any]]:
 
     @staticmethod
     @abstractmethod
-    def auth_instructions() -> str:
-        pass
+    def auth_instructions() -> str:        pass
 
     @abstractmethod
     def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnection):
         pass
 
     @abstractmethod
-    def list_tables(self) -> List[Dict[str, Any]]:
+    def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]:
         # should include: table_name, column_names, column_types, sample_data
         pass
 
 
@@ -1,15 +1,27 @@
+import logging
+import sys
 from typing import Dict, Any, List
 import pandas as pd
 import json
 import duckdb
 import random
 import string
+from datetime import datetime
 
 from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
 from azure.kusto.data.helpers import dataframe_from_result_table
 
 from data_formulator.data_loader.external_data_loader import ExternalDataLoader, sanitize_table_name
 
+# Configure root logger for general application logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+
+# Get logger for this module
+logger = logging.getLogger(__name__)
 
 class KustoDataLoader(ExternalDataLoader):
 
@@ -67,23 +79,93 @@ def __init__(self, params: Dict[str, Any], duck_db_conn: duckdb.DuckDBPyConnecti
                     self.kusto_cluster, self.client_id, self.client_secret, self.tenant_id))
             else:
                 # This function provides an interface to Kusto. It uses Azure CLI auth, but you can also use other auth types.
-                self.client = KustoClient(KustoConnectionStringBuilder.with_az_cli_authentication(self.kusto_cluster))
+                cluster_url = KustoConnectionStringBuilder.with_az_cli_authentication(self.kusto_cluster)
+                logger.info(f"Connecting to Kusto cluster: {self.kusto_cluster}")
+                self.client = KustoClient(cluster_url)
+                logger.info("Using Azure CLI authentication for Kusto client. Ensure you have run `az login` in your terminal.")
         except Exception as e:
-            raise Exception(f"Error creating Kusto client: {e}, please authenticate with Azure CLI when starting the app.")
-        
+            logger.error(f"Error creating Kusto client: {e}")
+            raise Exception(f"Error creating Kusto client: {e}, please authenticate with Azure CLI when starting the app.")        
         self.duck_db_conn = duck_db_conn
 
+    def _convert_kusto_datetime_columns(self, df: pd.DataFrame) -> pd.DataFrame:
+        """Convert Kusto datetime columns to proper pandas datetime format"""
+        logger.info(f"Processing DataFrame with columns: {list(df.columns)}")
+        logger.info(f"Column dtypes before conversion: {dict(df.dtypes)}")
+        
+        for col in df.columns:
+            original_dtype = df[col].dtype
+            
+            if df[col].dtype == 'object':
+                # Try to identify datetime columns by checking sample values
+                sample_values = df[col].dropna().head(3)
+                if len(sample_values) > 0:
+                    # Check if values look like datetime strings or timestamp numbers
+                    first_val = sample_values.iloc[0]
+                    
+                    # Handle Kusto datetime format (ISO 8601 strings)
+                    if isinstance(first_val, str) and ('T' in first_val or '-' in first_val):
+                        try:
+                            # Try to parse as datetime
+                            pd.to_datetime(sample_values.iloc[0])
+                            logger.info(f"Converting column '{col}' from string to datetime")
+                            df[col] = pd.to_datetime(df[col], errors='coerce', utc=True).dt.tz_localize(None)
+                        except Exception as e:
+                            logger.debug(f"Failed to convert column '{col}' as string datetime: {e}")
+                    
+                    # Handle numeric timestamps (Unix timestamps in various formats)
+                    elif isinstance(first_val, (int, float)) and first_val > 1000000000:
+                        try:
+                            # Try different timestamp formats
+                            if first_val > 1e15:  # Likely microseconds since epoch
+                                logger.info(f"Converting column '{col}' from microseconds timestamp to datetime")
+                                df[col] = pd.to_datetime(df[col], unit='us', errors='coerce', utc=True).dt.tz_localize(None)
+                            elif first_val > 1e12:  # Likely milliseconds since epoch
+                                logger.info(f"Converting column '{col}' from milliseconds timestamp to datetime")
+                                df[col] = pd.to_datetime(df[col], unit='ms', errors='coerce', utc=True).dt.tz_localize(None)
+                            else:  # Likely seconds since epoch
+                                logger.info(f"Converting column '{col}' from seconds timestamp to datetime")
+                                df[col] = pd.to_datetime(df[col], unit='s', errors='coerce', utc=True).dt.tz_localize(None)
+                        except Exception as e:
+                            logger.debug(f"Failed to convert column '{col}' as numeric timestamp: {e}")
+                            
+            # Handle datetime64 columns that might have timezone info
+            elif pd.api.types.is_datetime64_any_dtype(df[col]):
+                # Ensure timezone-aware datetimes are properly handled
+                if hasattr(df[col].dt, 'tz') and df[col].dt.tz is not None:
+                    logger.info(f"Converting timezone-aware datetime column '{col}' to UTC")
+                    df[col] = df[col].dt.tz_convert('UTC').dt.tz_localize(None)
+            
+            # Log if conversion happened
+            if original_dtype != df[col].dtype:
+                logger.info(f"Column '{col}' converted from {original_dtype} to {df[col].dtype}")
+        
+        logger.info(f"Column dtypes after conversion: {dict(df.dtypes)}")
+        return df
+
     def query(self, kql: str) -> pd.DataFrame:
+        logger.info(f"Executing KQL query: {kql} on database {self.kusto_database}")
         result = self.client.execute(self.kusto_database, kql)
-        return dataframe_from_result_table(result.primary_results[0])
+        logger.info(f"Query executed successfully, returning results.")
+        df = dataframe_from_result_table(result.primary_results[0])
+        
+        # Convert datetime columns properly
+        df = self._convert_kusto_datetime_columns(df)
+        
+        return df
 
-    def list_tables(self) -> List[Dict[str, Any]]:
+    def list_tables(self, table_filter: str = None) -> List[Dict[str, Any]]:
         query = ".show tables"
         tables_df = self.query(query)
 
         tables = []
         for table in tables_df.to_dict(orient="records"):
             table_name = table['TableName']
+            
+            # Apply table filter if provided
+            if table_filter and table_filter.lower() not in table_name.lower():
+                continue
+                
             schema_result = self.query(f".show table ['{table_name}'] schema as json").to_dict(orient="records")
             columns = [{
                 'name': r["Name"],
@@ -94,7 +176,10 @@ def list_tables(self) -> List[Dict[str, Any]]:
             row_count = row_count_result[0]["TotalRowCount"]
 
             sample_query = f"['{table_name}'] | take {5}"
-            sample_result = json.loads(self.query(sample_query).to_json(orient="records"))
+            sample_df = self.query(sample_query)
+            
+            # Convert sample data to JSON with proper datetime handling
+            sample_result = json.loads(sample_df.to_json(orient="records", date_format='iso'))
 
             table_metadata = {
                 "row_count": row_count,
@@ -159,7 +244,8 @@ def ingest_data(self, table_name: str, name_as: str = None, size: int = 5000000)
             total_rows_ingested += len(chunk_df)
 
     def view_query_sample(self, query: str) -> str:
-        return json.loads(self.query(query).head(10).to_json(orient="records"))
+        df = self.query(query).head(10)
+        return json.loads(df.to_json(orient="records", date_format='iso'))
 
     def ingest_data_from_query(self, query: str, name_as: str) -> pd.DataFrame:
         # Sanitize the table name for SQL compatibility