Skip to content

Commit f72b47a

Browse files
committed
update performance and enhance data loader
1 parent 9460b3e commit f72b47a

File tree

7 files changed

+183
-124
lines changed

7 files changed

+183
-124
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,10 @@ Any questions? Ask on the Discord channel! [![Discord](https://img.shields.io/ba
2626

2727
## News 🔥🔥🔥
2828

29-
- [05-13-2025] Data Formulator 0.2.3: External Data Loader
29+
- [05-13-2025] Data Formulator 0.2.3 / 0.2.4: External Data Loader
3030
- We introduced external data loader class to make import data easier. [Readme](https://github.com/microsoft/data-formulator/tree/main/py-src/data_formulator/data_loader) and [Demo](https://github.com/microsoft/data-formulator/pull/155)
3131
- Current data loaders: MySQL, Azure Data Explorer (Kusto), Azure Blob and Amazon S3 (json, parquet, csv).
32+
- [07-01-2025] Updated with: Postgresql, mssql.
3233
- Call for action [link](https://github.com/microsoft/data-formulator/issues/156):
3334
- Users: let us know which data source you'd like to load data from.
3435
- Developers: let's build more data loaders.

py-src/data_formulator/sse_routes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def event_stream() -> Generator[str, None, None]:
5252

5353
# Send initial connection confirmation
5454
yield format_sse_message({
55-
"type": "notification",
55+
"type": "heartbeat",
5656
"text": "agent connection ready",
5757
"timestamp": time.time()
5858
})

py-src/data_formulator/tables_routes.py

Lines changed: 1 addition & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -37,61 +37,6 @@
3737

3838
tables_bp = Blueprint('tables', __name__, url_prefix='/api/tables')
3939

40-
41-
def list_tables_util(db_conn):
42-
"""
43-
List all tables in the current session
44-
"""
45-
results = []
46-
47-
table_metadata_list = db_conn.execute("""
48-
SELECT database_name, schema_name, table_name, schema_name==current_schema() as is_current_schema, 'table' as object_type
49-
FROM duckdb_tables()
50-
WHERE internal=False AND database_name == current_database()
51-
UNION ALL
52-
SELECT database_name, schema_name, view_name as table_name, schema_name==current_schema() as is_current_schema, 'view' as object_type
53-
FROM duckdb_views()
54-
WHERE view_name NOT LIKE 'duckdb_%' AND view_name NOT LIKE 'sqlite_%' AND view_name NOT LIKE 'pragma_%' AND database_name == current_database()
55-
""").fetchall()
56-
57-
for table_metadata in table_metadata_list:
58-
[database_name, schema_name, table_name, is_current_schema, object_type] = table_metadata
59-
table_name = table_name if is_current_schema else '.'.join([database_name, schema_name, table_name])
60-
if database_name in ['system', 'temp']:
61-
continue
62-
63-
print(f"table_metadata: {table_metadata}")
64-
65-
try:
66-
# Get column information
67-
columns = db_conn.execute(f"DESCRIBE {table_name}").fetchall()
68-
69-
# Get row count
70-
row_count = db_conn.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
71-
sample_rows = db_conn.execute(f"SELECT * FROM {table_name} LIMIT 1000").fetchdf() if row_count > 0 else pd.DataFrame()
72-
73-
# Check if this is a view or a table
74-
try:
75-
# Get both view existence and source in one query
76-
view_info = db_conn.execute(f"SELECT view_name, sql FROM duckdb_views() WHERE view_name = '{table_name}'").fetchone()
77-
view_source = view_info[1] if view_info else None
78-
except Exception as e:
79-
# If the query fails, assume it's a regular table
80-
view_source = None
81-
82-
results.append({
83-
"name": table_name,
84-
"columns": [{"name": col[0], "type": col[1]} for col in columns],
85-
"row_count": row_count,
86-
"sample_rows": json.loads(sample_rows.to_json(orient='records')),
87-
"view_source": view_source
88-
})
89-
except Exception as e:
90-
logger.error(f"Error getting table metadata for {table_name}: {str(e)}")
91-
continue
92-
93-
return results
94-
9540
@tables_bp.route('/list-tables', methods=['GET'])
9641
def list_tables():
9742
"""List all tables in the current session"""
@@ -123,7 +68,7 @@ def list_tables():
12368
columns = db.execute(f"DESCRIBE {table_name}").fetchall()
12469
# Get row count
12570
row_count = db.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
126-
sample_rows = db.execute(f"SELECT * FROM {table_name} LIMIT 1000").fetchdf()
71+
sample_rows = db.execute(f"SELECT * FROM {table_name} LIMIT 1000").fetchdf() if row_count > 0 else pd.DataFrame()
12772

12873
# Check if this is a view or a table
12974
try:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "data_formulator"
7-
version = "0.2.1.3"
7+
version = "0.2.1.4"
88

99
requires-python = ">=3.9"
1010
authors = [

src/app/SSEActions.tsx

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import { createTableFromFromObjectArray } from "../data/utils";
2+
import { DataFormulatorState, generateFreshChart, getDataFieldItems, SSEMessage } from "./dfSlice";
3+
4+
import { Channel, Chart, ChartTemplate, EncodingItem, EncodingMap, FieldItem, Trigger } from '../components/ComponentType'
5+
6+
7+
let actionList = [
8+
{
9+
type: "create_chart",
10+
requiredFields: ['chart_type', 'table_ref', 'encodings']
11+
},
12+
{
13+
type: "load_table_from_object",
14+
requiredFields: ['table_name', 'rows']
15+
},
16+
{
17+
type: "derive_data_in_progress",
18+
requiredFields: ['action_id', 'source_table_ids', 'instruction', 'fields']
19+
},
20+
{
21+
type: "derive_data_completed",
22+
requiredFields: ['action_id','derived_table']
23+
},
24+
{
25+
type: "derive_data_failed",
26+
requiredFields: ['action_id']
27+
}
28+
]
29+
30+
let checkActionRequiredFields = (message: SSEMessage, state: DataFormulatorState) => {
31+
let action = actionList.find(a => a.type == message.data?.type);
32+
if (!action) {
33+
state.messages = [...state.messages, {
34+
component: "server",
35+
type: "error",
36+
timestamp: message.timestamp,
37+
value: `Unknown action type: ${message.data?.type}`
38+
}];
39+
return false;
40+
}
41+
let missingFields = action.requiredFields.filter(field => !message.data?.[field]);
42+
if (missingFields.length > 0) {
43+
state.messages = [...state.messages, {
44+
component: "server",
45+
type: "error",
46+
timestamp: message.timestamp,
47+
value: `[action] ${message.data?.type} - missing required fields: ${missingFields.join(', ')}`,
48+
detail: JSON.stringify(message.data).slice(0, 1000)
49+
}];
50+
return false;
51+
}
52+
return true;
53+
}
54+
55+
export const handleSSEMessage = (state: DataFormulatorState, message: SSEMessage) => {
56+
57+
if (message.type == "heartbeat") {
58+
return;
59+
}
60+
61+
if (message.type == "notification") {
62+
state.messages = [...state.messages, {
63+
component: "server",
64+
type: "info",
65+
timestamp: message.timestamp,
66+
value: message.text || "Unknown message" + " (no data provided, no action taken)"
67+
}];
68+
return;
69+
}
70+
71+
// otherwise, it's an action
72+
// if it has no data, it's an error
73+
if (!message.data) {
74+
state.messages = [...state.messages, {
75+
component: "server",
76+
type: "warning",
77+
timestamp: message.timestamp,
78+
value: message.text || "Unknown message" + " (no data provided, no action taken)"
79+
}];
80+
return;
81+
}
82+
83+
let action = message.data;
84+
let actionStatus : 'ok' | 'error' | 'in_progress' = 'ok';
85+
86+
if (!checkActionRequiredFields(message, state)) {
87+
return;
88+
}
89+
90+
if (action.type == "create_chart") {
91+
let chartType = action.chart_type;
92+
let encodings = action.encodings;
93+
let tableRef = action.table_ref;
94+
95+
let chart = generateFreshChart(tableRef, chartType);
96+
for (let [channel, fieldName] of Object.entries(encodings)) {
97+
let field = state.conceptShelfItems.find(f => f.name == fieldName);
98+
if (field) {
99+
chart.encodingMap[channel as Channel] = { fieldID: field.id };
100+
} else {
101+
let newField = { id: `custom--${fieldName}--${Date.now()}`, name: fieldName as string,
102+
type: "auto", source: "custom", domain: [], tableRef: 'custom' } as FieldItem;
103+
state.conceptShelfItems = [newField, ...state.conceptShelfItems];
104+
chart.encodingMap[channel as Channel] = { fieldID: newField.id };
105+
}
106+
}
107+
state.charts = [...state.charts, chart];
108+
} else if (action.type == "load_table_from_object") {
109+
let rows = action.rows;
110+
let tableName = action.table_name;
111+
let table = createTableFromFromObjectArray(tableName, rows, false);
112+
if (state.tables.find(t => t.id == table.id)) {
113+
table.id = `${tableName}--${Date.now()}`;
114+
}
115+
state.tables = [...state.tables, table];
116+
state.conceptShelfItems = [...state.conceptShelfItems, ...getDataFieldItems(table)];
117+
state.focusedTableId = table.id;
118+
} else if (action.type == "derive_data_in_progress") {
119+
actionStatus = 'in_progress';
120+
state.pendingSSEActions = [...state.pendingSSEActions, message];
121+
} else if (action.type == "derive_data_completed") {
122+
let actionId = action.action_id;
123+
state.tables = [...state.tables, action.derived_table];
124+
state.pendingSSEActions = state.pendingSSEActions.filter(m => m.data?.action_id != actionId);
125+
} else if (action.type == "derive_data_failed") {
126+
let actionId = action.action_id;
127+
actionStatus = 'error';
128+
state.pendingSSEActions = state.pendingSSEActions.filter(m => m.data?.action_id != actionId);
129+
} else {
130+
actionStatus = 'error';
131+
}
132+
state.messages = [...state.messages, {
133+
component: "server",
134+
type: actionStatus == 'ok' ? "success" : actionStatus == 'in_progress' ? "info" : "error",
135+
timestamp: message.timestamp,
136+
value: `[action] ${action.type} - ${message.text}`,
137+
detail: actionStatus == 'error' ? JSON.stringify(action).slice(0, 1000) : undefined
138+
}];
139+
140+
}

src/views/DBTableManager.tsx

Lines changed: 36 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ import {
3232
Collapse,
3333
styled,
3434
ToggleButtonGroup,
35-
ToggleButton
35+
ToggleButton,
36+
useTheme
3637
} from '@mui/material';
3738

3839
import DeleteIcon from '@mui/icons-material/Delete';
@@ -43,6 +44,7 @@ import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
4344
import TableRowsIcon from '@mui/icons-material/TableRows';
4445
import RefreshIcon from '@mui/icons-material/Refresh';
4546
import ArrowForwardIcon from '@mui/icons-material/ArrowForward';
47+
import SearchIcon from '@mui/icons-material/Search';
4648

4749
import { getUrls } from '../app/utils';
4850
import { CustomReactTable } from './ReactTable';
@@ -920,7 +922,7 @@ export const DataLoaderForm: React.FC<{
920922
}> = ({dataLoaderType, paramDefs, authInstructions, onImport, onFinish}) => {
921923

922924
const dispatch = useDispatch();
923-
925+
const theme = useTheme();
924926
const params = useSelector((state: DataFormulatorState) => state.dataLoaderConnectParams[dataLoaderType] ?? {});
925927

926928
const [tableMetadata, setTableMetadata] = useState<Record<string, any>>({}); let [displaySamples, setDisplaySamples] = useState<Record<string, boolean>>({});
@@ -1017,7 +1019,7 @@ export const DataLoaderForm: React.FC<{
10171019
}}>Import</Button>
10181020
</TableCell>
10191021
</TableRow>,
1020-
<TableRow >
1022+
<TableRow key={`${tableName}-sample`}>
10211023
<TableCell colSpan={4} sx={{ paddingBottom: 0, paddingTop: 0, px: 0, maxWidth: 800, overflowX: "auto",
10221024
borderBottom: displaySamples[tableName] ? '1px solid rgba(0, 0, 0, 0.1)' : 'none' }}>
10231025
<Collapse in={displaySamples[tableName]} timeout="auto" unmountOnExit>
@@ -1086,6 +1088,31 @@ export const DataLoaderForm: React.FC<{
10861088
/>
10871089
</Box>
10881090
))}
1091+
<TextField
1092+
size="small"
1093+
color="secondary"
1094+
sx={{width: "270px",
1095+
'& .MuiInputLabel-root': {fontSize: 14, color: theme.palette.secondary.main},
1096+
'& .MuiInputBase-root': {fontSize: 14},
1097+
'& .MuiInputBase-input::placeholder': {fontSize: 12, fontStyle: "italic"},
1098+
'&:hover': {
1099+
backgroundColor: alpha(theme.palette.secondary.main, 0.03),
1100+
},
1101+
}}
1102+
key="table-filter"
1103+
autoComplete="off"
1104+
variant="standard"
1105+
label={<Box sx={{display: "flex", flexDirection: "row", alignItems: "center", gap: 0.5}}>
1106+
<SearchIcon sx={{ fontSize: 16, color: theme.palette.secondary.main }} />
1107+
table filter
1108+
</Box>}
1109+
placeholder="load only tables containing keywords"
1110+
value={tableFilter}
1111+
onChange={(event) => setTableFilter(event.target.value)}
1112+
slotProps={{
1113+
inputLabel: {shrink: true},
1114+
}}
1115+
/>
10891116
{paramDefs.length > 0 && <ButtonGroup sx={{height: 32, mt: 'auto'}} size="small"
10901117
variant="contained" color="primary">
10911118
<Button
@@ -1121,7 +1148,7 @@ export const DataLoaderForm: React.FC<{
11211148
setIsConnecting(false);
11221149
});
11231150
}}>
1124-
{Object.keys(tableMetadata).length > 0 ? "refresh" : "connect"}
1151+
{Object.keys(tableMetadata).length > 0 ? "refresh" : "connect"} {tableFilter.trim() ? "with filter" : ""}
11251152
</Button>
11261153
<Button
11271154
disabled={Object.keys(tableMetadata).length === 0}
@@ -1131,66 +1158,12 @@ export const DataLoaderForm: React.FC<{
11311158
}}>
11321159
disconnect
11331160
</Button>
1134-
</ButtonGroup> }
1135-
1161+
</ButtonGroup>}
1162+
11361163
</Box>
1137-
{Object.keys(tableMetadata).length === 0 && (
1138-
<Box sx={{display: "flex", flexDirection: "row", alignItems: "center", gap: 1, ml: 4, mt: 4}}>
1139-
<TextField
1140-
size="small"
1141-
sx={{
1142-
width: "200px",
1143-
'& .MuiInputLabel-root': {fontSize: 12},
1144-
'& .MuiInputBase-root': {fontSize: 12, height: 32},
1145-
}}
1146-
variant="outlined"
1147-
label="Filter tables"
1148-
placeholder="Type to filter tables..."
1149-
value={tableFilter}
1150-
onChange={(event) => setTableFilter(event.target.value)}
1151-
InputProps={{
1152-
startAdornment: (
1153-
<SearchIcon sx={{ fontSize: 16, color: 'text.secondary', mr: 0.5 }} />
1154-
),
1155-
}}
1156-
/>
1157-
<Button
1158-
size="small"
1159-
sx={{textTransform: "none", height: 32}}
1160-
onClick={() => {
1161-
setIsConnecting(true);
1162-
fetch(getUrls().DATA_LOADER_LIST_TABLES, {
1163-
method: 'POST',
1164-
headers: {
1165-
'Content-Type': 'application/json',
1166-
},
1167-
body: JSON.stringify({
1168-
data_loader_type: dataLoaderType,
1169-
data_loader_params: params,
1170-
table_filter: tableFilter.trim() || null
1171-
})
1172-
}).then(response => response.json())
1173-
.then(data => {
1174-
if (data.status === "success") {
1175-
console.log(data.tables);
1176-
setTableMetadata(Object.fromEntries(data.tables.map((table: any) => {
1177-
return [table.name, table.metadata];
1178-
})));
1179-
} else {
1180-
console.error('Failed to fetch data loader tables: {}', data.message);
1181-
onFinish("error", `Failed to fetch data loader tables: ${data.message}`);
1182-
}
1183-
setIsConnecting(false);
1184-
})
1185-
.catch(error => {
1186-
onFinish("error", `Failed to fetch data loader tables, please check the server is running`);
1187-
setIsConnecting(false);
1188-
});
1189-
}}>
1190-
apply filter
1191-
</Button>
1192-
</Box>
1193-
)}
1164+
<Box sx={{display: "flex", flexDirection: "row", alignItems: "center", gap: 1, ml: 4, mt: 4}}>
1165+
1166+
</Box>
11941167
<Button
11951168
variant="text"
11961169
size="small"

0 commit comments

Comments
 (0)