diff --git a/.gitignore b/.gitignore index dce09a74..a42c13b0 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ .sw? #OS X specific files. .DS_store +#VSCode specifics +.vscode/ #==============================================================================# # Build artifacts @@ -45,6 +47,7 @@ cmake-build-release cmake-build-relwithdebinfo duckdb_packaging/duckdb_version.txt test.db +tmp/ #==============================================================================# # Python diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 6c36d7be..796b8bff 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -86,9 +86,11 @@ __all__: list[str] = [ "default_connection", "description", "df", + "disable_profiling", "distinct", "dtype", "duplicate", + "enable_profiling", "enum_type", "execute", "executemany", @@ -109,6 +111,7 @@ __all__: list[str] = [ "from_df", "from_parquet", "from_query", + "get_profiling_information", "get_table_names", "install_extension", "interrupt", @@ -313,6 +316,9 @@ class DuckDBPyConnection: repository_url: str | None = None, version: str | None = None, ) -> None: ... + def get_profiling_information(self, format: str = "json") -> str: ... + def enable_profiling(self) -> None: ... + def disable_profiling(self) -> None: ... def interrupt(self) -> None: ... def list_filesystems(self) -> list[str]: ... def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... @@ -1227,6 +1233,9 @@ def limit( *, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... +def get_profiling_information(*, connection: DuckDBPyConnection | None = None, format: str = "json") -> str: ... +def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... +def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> list[str]: ... def list_type( type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None diff --git a/duckdb/__init__.py b/duckdb/__init__.py index e1a4aa9a..82596611 100644 --- a/duckdb/__init__.py +++ b/duckdb/__init__.py @@ -84,9 +84,11 @@ default_connection, description, df, + disable_profiling, distinct, dtype, duplicate, + enable_profiling, enum_type, execute, executemany, @@ -107,6 +109,7 @@ from_df, from_parquet, from_query, + get_profiling_information, get_table_names, install_extension, interrupt, @@ -310,9 +313,11 @@ "default_connection", "description", "df", + "disable_profiling", "distinct", "dtype", "duplicate", + "enable_profiling", "enum_type", "execute", "executemany", @@ -333,6 +338,7 @@ "from_df", "from_parquet", "from_query", + "get_profiling_information", "get_table_names", "install_extension", "interrupt", diff --git a/duckdb/query_graph/__init__.py b/duckdb/query_graph/__init__.py new file mode 100644 index 00000000..170a774e --- /dev/null +++ b/duckdb/query_graph/__init__.py @@ -0,0 +1,3 @@ +from .__main__ import translate_json_to_html + +__all__ = ["translate_json_to_html"] \ No newline at end of file diff --git a/duckdb/query_graph/__main__.py b/duckdb/query_graph/__main__.py index d4851694..443a9f80 100644 --- a/duckdb/query_graph/__main__.py +++ b/duckdb/query_graph/__main__.py @@ -6,71 +6,233 @@ from pathlib import Path qgraph_css = """ -.styled-table { - border-collapse: collapse; - margin: 25px 0; - font-size: 0.9em; - font-family: sans-serif; - min-width: 400px; - box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); +:root { + --text-primary-color: #0d0d0d; + --text-secondary-color: #444; + --doc-codebox-border-color: #e6e6e6; + --doc-codebox-background-color: #f7f7f7; + --doc-scrollbar-bg: #e6e6e6; + --doc-scrollbar-slider: #ccc; + --duckdb-accent: #009982; + --duckdb-accent-light: #00b89a; + --card-bg: #fff; + --border-radius: 8px; + --shadow: 0 4px 14px rgba(0,0,0,0.05); } -.styled-table thead tr { - background-color: #009879; - color: #ffffff; - text-align: left; + +html, body { + margin: 0; + padding: 0; + font-family: Inter, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif; + color: var(--text-primary-color); + background: #fafafa; + line-height: 1.55; } -.styled-table th, -.styled-table td { - padding: 12px 15px; + +.container { + max-width: 1000px; + margin: 40px auto; + padding: 0 20px; } -.styled-table tbody tr { - border-bottom: 1px solid #dddddd; + +header { + display: flex; + align-items: center; + gap: 10px; + margin-bottom: 5px; } -.styled-table tbody tr:nth-of-type(even) { - background-color: #f3f3f3; +header img { + width: 100px; + height: 100px; } -.styled-table tbody tr:last-of-type { - border-bottom: 2px solid #009879; +header h1 { + font-size: 1.5rem; + font-weight: 600; + margin: 0; + color: var(--text-primary-color); } -.node-body { - font-size:15px; +/* === Table Styling (DuckDB documentation style, flat header) === */ +table { + border-collapse: collapse; + width: 100%; + margin-bottom: 20px; + text-align: left; + font-variant-numeric: tabular-nums; + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + overflow: hidden; + box-shadow: var(--shadow); + background: var(--card-bg); +} + +thead { + background-color: var(--duckdb-accent); + color: white; +} + +th, td { + padding: 10px 12px; + font-size: 14px; + vertical-align: top; +} + +th { + font-weight: 700; +} + +tbody tr { + border-bottom: 1px solid var(--doc-codebox-border-color); +} + +tbody tr:last-child td { + border-bottom: none; +} + +tbody tr:hover { + background: var(--doc-codebox-border-color); +} + +/* === Chart/Card Section === */ +.chart { + padding: 20px; + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + background: var(--card-bg); + box-shadow: var(--shadow); + overflow: visible; +} + +/* === Tree Layout Styling === */ +.tf-tree { + overflow-x: visible; + overflow-y: visible; + padding-top: 20px; } + .tf-nc { - position: relative; - width: 180px; - text-align: center; - background-color: #fff100; + background: var(--card-bg); + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + padding: 6px; + display: inline-block; } -.custom-tooltip { - position: relative; + +.node-body { + font-size: 13px; + text-align: left; + padding: 10px; + white-space: nowrap; +} + +.node-body p { + margin: 2px 0; +} + +.node-details { + white-space: nowrap; + overflow: visible; display: inline-block; } -.tooltip-text { - visibility: hidden; - background-color: #333; - color: #fff; +/* === Metric Boxes === */ +.chart .metrics-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + gap: 16px; + margin-bottom: 20px; +} + +.chart .metric-box { + background: var(--card-bg); + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + box-shadow: var(--shadow); + padding: 12px 16px; text-align: center; - padding: 0px; - border-radius: 1px; - - /* Positioning */ - position: absolute; - z-index: 1; - bottom: 100%; - left: 50%; - transform: translateX(-50%); - margin-bottom: 8px; - - /* Tooltip Arrow */ - width: 400px; + transition: transform 0.2s ease, box-shadow 0.2s ease; } -.custom-tooltip:hover .tooltip-text { - visibility: visible; +.chart .metric-box:hover { + transform: translateY(-2px); + box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08); +} + +.chart .metric-title { + font-size: 13px; + color: var(--text-secondary-color); + margin-bottom: 4px; + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.chart .metric-value { + font-size: 18px; + font-weight: 600; + color: var(--duckdb-accent); +} + + +/* === SQL Query Block === */ +.chart.sql-block { + background: var(--doc-codebox-background-color); + border: 1px solid var(--doc-codebox-border-color); + border-radius: var(--border-radius); + box-shadow: var(--shadow); + padding: 16px; + overflow-x: auto; + margin-top: 20px; +} + +.chart.sql-block pre { + margin: 0; + font-family: "JetBrains Mono", "Fira Code", Consolas, monospace; + font-size: 13.5px; + line-height: 1.5; + color: var(--text-primary-color); + white-space: pre; +} + +.chart.sql-block code { + color: var(--duckdb-accent); + font-weight: 500; +} + + +/* === Links, Typography, and Consistency === */ +a { + color: var(--duckdb-accent); + text-decoration: underline; + transition: color 0.3s; +} + +a:hover { + color: black; +} + +strong { + font-weight: 600; +} + +/* === Dark Mode Support === */ +@media (prefers-color-scheme: dark) { + :root { + --text-primary-color: #e6e6e6; + --doc-codebox-border-color: #2a2a2a; + --doc-codebox-background-color: #1e1e1e; + --card-bg: #111; + } + body { + background: #0b0b0b; + } + thead { + background-color: var(--duckdb-accent); + } + tbody tr:hover { + background: #222; + } } """ @@ -131,37 +293,49 @@ def get_child_timings(top_node: object, query_timings: object) -> str: # noqa: get_child_timings(child, query_timings) -def get_pink_shade_hex(fraction: float) -> str: # noqa: D103 +def get_f7fff0_shade_hex(fraction: float) -> str: + """ + Returns a shade between very light (#f7fff0) and a slightly darker green-yellow, + depending on the fraction (0..1) + """ fraction = max(0, min(1, fraction)) - # Define the RGB values for very light pink (almost white) and dark pink - light_pink = (255, 250, 250) # Very light pink - dark_pink = (255, 20, 147) # Dark pink + # Define RGB for light and dark end + light_color = (247, 255, 240) # #f7fff0 + dark_color = (200, 255, 150) # slightly darker/more saturated green-yellow - # Calculate the RGB values for the given fraction - r = int(light_pink[0] + (dark_pink[0] - light_pink[0]) * fraction) - g = int(light_pink[1] + (dark_pink[1] - light_pink[1]) * fraction) - b = int(light_pink[2] + (dark_pink[2] - light_pink[2]) * fraction) + # Interpolate RGB channels + r = int(light_color[0] + (dark_color[0] - light_color[0]) * fraction) + g = int(light_color[1] + (dark_color[1] - light_color[1]) * fraction) + b = int(light_color[2] + (dark_color[2] - light_color[2]) * fraction) - # Return as hexadecimal color code return f"#{r:02x}{g:02x}{b:02x}" -def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, width: int, extra_info: str) -> str: # noqa: D103 - node_style = f"background-color: {get_pink_shade_hex(float(result) / cpu_time)};" - - body = f'' - body += '
' +def get_node_body(name: str, result: str, cpu_time: float, card: int, est: int, result_size: int, extra_info: str) -> str: # noqa: D103 + """ + Generate the HTML body for a single node in the tree. + """ + node_style = f"background-color: {get_f7fff0_shade_hex(float(result) / cpu_time)};" new_name = "BRIDGE" if (name == "INVALID") else name.replace("_", " ") formatted_num = f"{float(result):.4f}" - body += f"

{new_name}

time: {formatted_num} seconds

" - body += f' {extra_info} ' - if width > 0: + + body = f'' + body += '
' + body += f"

{new_name}

" + if result_size > 0: + body += f"

time: {formatted_num}s

" body += f"

cardinality: {card}

" body += f"

estimate: {est}

" - body += f"

width: {width} bytes

" + body += f"

result size: {result_size} bytes

" + body += "
" + body += f"Extra info" + body += '
' + body += f"

{extra_info}

" # TODO: Expand on timing. Usually available from a detailed profiling # noqa: TD002, TD003 body += "
" + body += "
" + body += "
" body += "
" return body @@ -178,8 +352,6 @@ def generate_tree_recursive(json_graph: object, cpu_time: float) -> str: # noqa estimate = int(value) else: extra_info += f"{key}: {value}
" - cardinality = json_graph["operator_cardinality"] - width = int(json_graph["result_set_size"] / max(1, cardinality)) # get rid of some typically long names extra_info = re.sub(r"__internal_\s*", "__", extra_info) @@ -189,9 +361,9 @@ def generate_tree_recursive(json_graph: object, cpu_time: float) -> str: # noqa json_graph["operator_type"], json_graph["operator_timing"], cpu_time, - cardinality, + json_graph["operator_cardinality"], estimate, - width, + json_graph["result_set_size"], re.sub(r",\s*", ", ", extra_info), ) @@ -208,13 +380,12 @@ def generate_tree_recursive(json_graph: object, cpu_time: float) -> str: # noqa def generate_timing_html(graph_json: object, query_timings: object) -> object: # noqa: D103 json_graph = json.loads(graph_json) gather_timing_information(json_graph, query_timings) - total_time = float(json_graph.get("operator_timing") or json_graph.get("latency")) table_head = """ - +
- + """ @@ -225,23 +396,55 @@ def generate_timing_html(graph_json: object, query_timings: object) -> object: execution_time = query_timings.get_sum_of_all_timings() all_phases = query_timings.get_phases() - query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time)) - query_timings.add_node_timing(NodeTiming("Execution Time", execution_time)) - all_phases = ["TOTAL TIME", "Execution Time", *all_phases] + query_timings.add_node_timing(NodeTiming("Execution Time (CPU)", execution_time)) + all_phases = ["Execution Time (CPU)", *all_phases] for phase in all_phases: summarized_phase = query_timings.get_summary_phase_timings(phase) - summarized_phase.calculate_percentage(total_time) - phase_column = f"{phase}" if phase == "TOTAL TIME" or phase == "Execution Time" else phase + summarized_phase.calculate_percentage(execution_time) + phase_column = f"{phase}" if phase == "Execution Time (CPU)" else phase table_body += f""" - + """ table_body += table_end return table_head + table_body +def generate_metric_grid_html(graph_json: str) -> str: # noqa: D103 + json_graph = json.loads(graph_json) + metrics = { + "Execution Time (s)": f"{float(json_graph.get("latency", "N/A")):.4f}", + "Total GB Read": f"{float(json_graph.get("total_bytes_read", "N/A")) / (1024 ** 3):.4f}" if json_graph.get("total_bytes_read", "N/A") != "N/A" else "N/A", + "Total GB Written": f"{float(json_graph.get("total_bytes_written", "N/A")) / (1024 ** 3):.4f}" if json_graph.get("total_bytes_written", "N/A") != "N/A" else "N/A", + "Peak Memory (GB)": f"{float(json_graph.get("system_peak_buffer_memory", "N/A")) / (1024 ** 3):.4f}" if json_graph.get("system_peak_buffer_memory", "N/A") != "N/A" else "N/A", + "Rows Scanned": f"{json_graph.get("cumulative_rows_scanned", "N/A"):,}" if json_graph.get("cumulative_rows_scanned", "N/A") != "N/A" else "N/A", + } + metric_grid_html = """
""" + for key in metrics.keys(): + metric_grid_html += f""" +
+
{key}
+
{metrics[key]}
+
+ """ + metric_grid_html += "
" + return metric_grid_html + +def generate_sql_query_html(graph_json: str) -> str: # noqa: D103 + json_graph = json.loads(graph_json) + sql_query = json_graph.get("query_name", "N/A") + sql_html = f""" +
SQL Query +
+

+{sql_query}
+        
+
+

+ """ + return sql_html def generate_tree_html(graph_json: object) -> str: # noqa: D103 json_graph = json.loads(graph_json) @@ -269,10 +472,10 @@ def generate_ipython(json_input: str) -> str: # noqa: D103 def generate_style_html(graph_json: str, include_meta_info: bool) -> None: # noqa: D103, FBT001 treeflex_css = '\n' - css = "\n" - return {"treeflex_css": treeflex_css, "duckdb_css": css, "libraries": "", "chart_script": ""} + libraries = ( + '\n' + ) + return {"treeflex_css": treeflex_css, "duckdb_css": qgraph_css, "libraries": libraries, "chart_script": ""} def gather_timing_information(json: str, query_timings: object) -> None: # noqa: D103 @@ -282,14 +485,22 @@ def gather_timing_information(json: str, query_timings: object) -> None: # noqa get_child_timings(json["children"][0], query_timings) -def translate_json_to_html(input_file: str, output_file: str) -> None: # noqa: D103 +def translate_json_to_html(input_file: str = None, input_text: str = None, output_file: str = "profile.html") -> None: # noqa: D103 query_timings = AllTimings() - with open_utf8(input_file, "r") as f: - text = f.read() + if input_text is not None: + text = input_text + elif input_file is not None: + with open_utf8(input_file, "r") as f: + text = f.read() + else: + print("please provide either input file or input text") + exit(1) html_output = generate_style_html(text, True) + highlight_metric_grid = generate_metric_grid_html(text) timing_table = generate_timing_html(text, query_timings) tree_output = generate_tree_html(text) + sql_query_html = generate_sql_query_html(text) # finally create and write the html with open_utf8(output_file, "w+") as f: @@ -302,12 +513,20 @@ def translate_json_to_html(input_file: str, output_file: str) -> None: # noqa: ${TREEFLEX_CSS} + - -
+ +
+
+ DuckDB Logo +

Query Profile Graph

+
+
+ ${METRIC_GRID} +
- ${TIMING_TABLE} + ${SQL_QUERY} + ${TIMING_TABLE}
${TREE} @@ -315,6 +534,8 @@ def translate_json_to_html(input_file: str, output_file: str) -> None: # noqa: """ html = html.replace("${TREEFLEX_CSS}", html_output["treeflex_css"]) html = html.replace("${DUCKDB_CSS}", html_output["duckdb_css"]) + html = html.replace("${METRIC_GRID}", highlight_metric_grid) + html = html.replace("${SQL_QUERY}", sql_query_html) html = html.replace("${TIMING_TABLE}", timing_table) html = html.replace("${TREE}", tree_output) f.write(html) diff --git a/scripts/connection_methods.json b/scripts/connection_methods.json index a87b992f..3b02a9b1 100644 --- a/scripts/connection_methods.json +++ b/scripts/connection_methods.json @@ -1093,5 +1093,30 @@ } ], "return": "None" + }, + { + "name": "get_profiling_information", + "function": "GetProfilingInformation", + "docs": "Get profiling information for a query", + "args": [ + { + "name": "format", + "default": "JSON", + "type": "Optional[str]" + } + ], + "return": "str" + }, + { + "name": "enable_profiling", + "function": "EnableProfiling", + "docs": "Enable profiling for a connection", + "return": "None" + }, + { + "name": "disable_profiling", + "function": "DisableProfiling", + "docs": "Disable profiling for a connection", + "return": "None" } ] diff --git a/scripts/generate_connection_stubs.py b/scripts/generate_connection_stubs.py index d542a047..76c19b36 100644 --- a/scripts/generate_connection_stubs.py +++ b/scripts/generate_connection_stubs.py @@ -5,7 +5,7 @@ os.chdir(Path(__file__).parent) JSON_PATH = "connection_methods.json" -DUCKDB_STUBS_FILE = Path("..") / "duckdb" / "__init__.pyi" +DUCKDB_STUBS_FILE = Path("..") / "_duckdb-stubs" / "__init__.pyi" START_MARKER = " # START OF CONNECTION METHODS" END_MARKER = " # END OF CONNECTION METHODS" diff --git a/src/duckdb_py/duckdb_python.cpp b/src/duckdb_py/duckdb_python.cpp index 1dd3ba17..87f2fb39 100644 --- a/src/duckdb_py/duckdb_python.cpp +++ b/src/duckdb_py/duckdb_python.cpp @@ -124,6 +124,34 @@ static void InitializeConnectionMethods(py::module_ &m) { }, "Check if a filesystem with the provided name is currently registered", py::arg("name"), py::kw_only(), py::arg("connection") = py::none()); + m.def( + "get_profiling_information", + [](const py::str &format, shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->GetProfilingInformation(format); + }, + "Get profiling information from a query", py::kw_only(), py::arg("format") = "json", + py::arg("connection") = py::none()); + m.def( + "enable_profiling", + [](shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->EnableProfiling(); + }, + "Enable profiling for the current connection", py::kw_only(), py::arg("connection") = py::none()); + m.def( + "disable_profiling", + [](shared_ptr conn = nullptr) { + if (!conn) { + conn = DuckDBPyConnection::DefaultConnection(); + } + return conn->DisableProfiling(); + }, + "Disable profiling for the current connection", py::kw_only(), py::arg("connection") = py::none()); m.def( "create_function", [](const string &name, const py::function &udf, const py::object &arguments = py::none(), diff --git a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp index 48ee055e..8117eda9 100644 --- a/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp +++ b/src/duckdb_py/include/duckdb_python/pyconnection/pyconnection.hpp @@ -337,6 +337,11 @@ struct DuckDBPyConnection : public enable_shared_from_this { py::list ListFilesystems(); bool FileSystemIsRegistered(const string &name); + // Profiling info + py::str GetProfilingInformation(const py::str &format = "json"); + void EnableProfiling(); + void DisableProfiling(); + //! Default connection to an in-memory database static DefaultConnectionHolder default_connection; //! Caches and provides an interface to get frequently used modules+subtypes diff --git a/src/duckdb_py/pyconnection.cpp b/src/duckdb_py/pyconnection.cpp index b88b88ed..583e4165 100644 --- a/src/duckdb_py/pyconnection.cpp +++ b/src/duckdb_py/pyconnection.cpp @@ -3,6 +3,7 @@ #include "duckdb/catalog/default/default_types.hpp" #include "duckdb/common/arrow/arrow.hpp" #include "duckdb/common/enums/file_compression_type.hpp" +#include "duckdb/common/enums/profiler_format.hpp" #include "duckdb/common/printer.hpp" #include "duckdb/common/types.hpp" #include "duckdb/common/types/vector.hpp" @@ -285,6 +286,9 @@ static void InitializeConnectionMethods(py::class_
PhaseTimeTime (s) Percentage
{phase_column}{summarized_phase.time}{round(summarized_phase.time, 8)} {str(summarized_phase.percentage * 100)[:6]}%