diff --git a/docs/howto/supplier_tree.md b/docs/howto/supplier_tree.md index cd70e52f..4b785d56 100644 --- a/docs/howto/supplier_tree.md +++ b/docs/howto/supplier_tree.md @@ -112,7 +112,7 @@ print(mapping2mermaid(rows, title=title)) The table below shows the values for the decision model. Each row of the table corresponds to a path through the decision model diagram above. -% include-markdown "../_includes/_scrollable_table.md" heading-offset=1 %} +{% include-markdown "../_includes/_scrollable_table.md" heading-offset=1 %} ```python exec="true" idprefix="" diff --git a/src/README.md b/src/README.md index 7ad93615..2029adf4 100644 --- a/src/README.md +++ b/src/README.md @@ -27,7 +27,7 @@ After installation, import the package and explore the examples: print(to_play.LATEST.model_dump_json(indent=2)) #Show decision tree in ascii text art - from ssvc.decision_tables.base import ascii_tree + from ssvc.decision_tables.helpers import ascii_tree print(ascii_tree(to_play.LATEST)) Explanation @@ -92,7 +92,7 @@ For usage in vulnerability management scenarios consider the following popular S print(CISACoordinate.model_dump_json(indent=2)) #Print CISA Decision Table as an ascii tree - from ssvc.decision_tables.base import ascii_tree + from ssvc.decision_tables.helpers import ascii_tree print(ascii_tree(CISACoordinate)) diff --git a/src/ssvc/decision_tables/base.py b/src/ssvc/decision_tables/base.py index 24aac2e8..9f547e65 100644 --- a/src/ssvc/decision_tables/base.py +++ b/src/ssvc/decision_tables/base.py @@ -702,88 +702,7 @@ def check_topological_order(dt: DecisionTable) -> list[dict]: df, target=target, target_value_order=target_value_order ) -def build_tree(df: pd.DataFrame, columns: pd.Index | list[str]) -> dict[str, dict[str, str] | list[str]] | list[str]: - """ - Recursively build a nested dict: - {feature_value: subtree_or_list_of_outcomes} - - This tree should preserve the original row order of the DataFrame. - """ - # Base case: if only one column is left, it's the outcome. - if len(columns) == 1: - # Last column: return a list of outcomes. - return df[columns[0]].astype(str).tolist() - - # Get the first feature column and the rest of the columns. - first, rest = columns[0], columns[1:] - tree = {} - - # Iterate through the unique values of the first column in the order they appear. - # This is the key change to preserve the original CSV order. - for val in df[first].unique(): - # Filter the DataFrame to get the group for the current value. - group = df[df[first] == val] - # Recursively build the subtree for this group. - tree[str(val)] = build_tree(group, rest) - - return tree - -def draw_tree(node: dict | list, prefix: str="", lines: list | None = None) -> list: - """ - Pretty-print nested dict/list as a tree. - """ - if lines is None: - lines = [] - - if isinstance(node, dict): - items = list(node.items()) - for i, (k, v) in enumerate(items): - # Determine the branch characters for the tree. - branch = "└── " if i == len(items) - 1 else "├── " - lines.append(prefix + branch + k + " " * 4) - - # Calculate the prefix for the next level of the tree. - next_prefix = prefix + (" " * 16 if i == len(items) - 1 else "│" + " " * 15) - # Recursively draw the subtree. - draw_tree(v, next_prefix, lines) - else: # list of outcomes - for i, leaf in enumerate(node): - # Determine the branch characters for the leaves. - branch = "└── " if i == len(node) - 1 else "├── " - lines.append(prefix + branch + f"[{leaf}]") - - return lines - def ascii_tree(dt: DecisionTable, df: pd.DataFrame | None = None) -> str: - """ - Reads a Pandas data frame, builds a decision tree, and returns its ASCII representation. - """ - # Check for the optional 'row' column and drop it if it exists. - if df == None: - df = decision_table_to_longform_df(dt) - - if 'row' in df.columns: - df.drop(columns='row', inplace=True) - - # Separate feature columns from the outcome column. - feature_cols = list(df.columns[:-1]) - outcome_col = df.columns[-1] - - # Build the tree structure. - tree = build_tree(df, feature_cols + [outcome_col]) - # Draw the tree into a list of strings. - lines = draw_tree(tree) - - # Generate the header line. - header = "" - for item in df.columns: - if len(item) > 14: - header = header + item[0:12] + ".." + " | " - else: - header = header + item + " " * (14 - len(item)) + " | " - - # Generate the separator line. - sep = "-" * len(header) - - # Combine the header, separator, and tree lines into a single string. - return "\n".join([header, sep] + lines) + """ Function moved to helpers.py see there for details """ + from . import helpers + return helpers.ascii_tree(dt, df) diff --git a/src/ssvc/decision_tables/helpers.py b/src/ssvc/decision_tables/helpers.py index 3de37678..76f4639a 100644 --- a/src/ssvc/decision_tables/helpers.py +++ b/src/ssvc/decision_tables/helpers.py @@ -24,6 +24,7 @@ import logging +import pandas as pd from ssvc.decision_tables.base import ( DecisionTable, @@ -215,7 +216,7 @@ def mapping2mermaid(rows: list[dict[str:str]], title: str = None) -> str: def dt2df_md( - dt: "DecisionTable", + dt: DecisionTable, longform: bool = True, ) -> str: """ @@ -224,7 +225,7 @@ def dt2df_md( decision_table (DecisionTable): The decision table to convert. longform (bool): Whether to return the longform or shortform DataFrame. Returns: - str: A string representation of the DataFrame in CSV format. + str: A string representation of the DataFrame in text/markdown format. """ if longform: df = decision_table_to_longform_df(dt) @@ -234,6 +235,161 @@ def dt2df_md( df.index.rename("Row", inplace=True) return df.to_markdown(index=True) +def dt2df_html( + dt: DecisionTable, + longform: bool = True) -> str: + """ + Converts a Decision Tree and represent it in friendly HTML Code + Args: + decision_table (DecisionTable): The decision table to convert. + longform (bool): Whether to return the longform or shortform DataFram, defaults to true + Returns: + str: A string representation of the DataFrame in HTML format. + """ + + if longform: + df = decision_table_to_longform_df(dt) + else: + df = decision_table_to_shortform_df(dt) + + df = decision_table_to_longform_df(dt) + ncols = len(df.columns) + nrows = len(df) + + # Precompute rowspan info for every cell + # rowspan[i][j] = number of rows this cell should span; 0 means skip (because merged above) + rowspan = [[1]*ncols for _ in range(nrows)] + + for col in range(ncols): + r = 0 + while r < nrows: + start = r + val = df.iat[r, col] #data_rows[r][col] + # Count how many subsequent rows have same value + while r + 1 < nrows and df.iat[r + 1, col] == val:#data_rows[r + 1][col] == val: + r += 1 + span = r - start + 1 + if span > 1: + # Assign span to first, mark rest as 0 (skip) + rowspan[start][col] = span + for k in range(start + 1, start + span): + rowspan[k][col] = 0 + r += 1 + + # Build HTML + html = [""""""] + html.append("
| {h} | " for h in df.columns) + "{val} | ') + else: + cells.append(f'{val} | ') + j = j + 1 + html.append("
|---|