Skip to content

Commit 3b14bf1

Browse files
committed
langfuse integration
1 parent fcbf513 commit 3b14bf1

15 files changed

+2516
-2216
lines changed

src/threatforest/cli.py

Lines changed: 123 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,9 @@ def config_path():
446446
@click.option("--secret-key", "-s", default=None, help="Langfuse secret key (sk-lf-...)")
447447
@click.option("--host", "-h", default=None, help="Langfuse host (default: https://cloud.langfuse.com)")
448448
@click.option("--test", is_flag=True, help="Test the connection after configuring")
449-
def config_langfuse(enable, public_key, secret_key, host, test):
449+
@click.option("--register-scores", is_flag=True, help="Register score definitions with Langfuse")
450+
@click.option("--sync-scores", is_flag=True, help="Sync local registry with existing Langfuse score configs")
451+
def config_langfuse(enable, public_key, secret_key, host, test, register_scores, sync_scores):
450452
"""Configure Langfuse tracing credentials.
451453
452454
Langfuse provides observability for your threat modeling workflows,
@@ -468,6 +470,12 @@ def config_langfuse(enable, public_key, secret_key, host, test):
468470
469471
# Test existing configuration
470472
threatforest config langfuse --test
473+
474+
# Register score definitions with Langfuse
475+
threatforest config langfuse --register-scores
476+
477+
# Sync local registry with existing Langfuse configs
478+
threatforest config langfuse --sync-scores
471479
"""
472480
from threatforest.modules.utils.env_manager import EnvManager
473481
from rich.panel import Panel
@@ -592,12 +600,78 @@ def config_langfuse(enable, public_key, secret_key, host, test):
592600
console.print(f"[red]Connection failed:[/red] {e}")
593601
console.print("[dim]Please verify your credentials are correct[/dim]\n")
594602

603+
# Handle score registration
604+
if register_scores or sync_scores:
605+
console.print("\n[cyan]Managing score configurations...[/cyan]")
606+
607+
# Get current values
608+
reg_public = public_key or env_manager.get_value('LANGFUSE_PUBLIC_KEY')
609+
reg_secret = secret_key or env_manager.get_value('LANGFUSE_SECRET_KEY')
610+
reg_host = host or env_manager.get_value('LANGFUSE_HOST') or 'https://cloud.langfuse.com'
611+
612+
if not reg_public or not reg_secret:
613+
console.print("[red]Error:[/red] Missing public key or secret key")
614+
console.print("[dim]Configure credentials first: threatforest config langfuse[/dim]\n")
615+
return
616+
617+
try:
618+
from threatforest.tracing.config import LangfuseConfig
619+
from threatforest.tracing.score_configs import ScoreConfigRegistry
620+
621+
langfuse_config = LangfuseConfig(
622+
enabled=True,
623+
public_key=reg_public,
624+
secret_key=reg_secret,
625+
host=reg_host
626+
)
627+
628+
registry = ScoreConfigRegistry(langfuse_config)
629+
630+
if sync_scores:
631+
console.print("[cyan]Syncing with existing Langfuse score configs...[/cyan]")
632+
registry.sync_with_langfuse()
633+
configs = registry.get_registered_configs()
634+
console.print(f"[green]✓[/green] Synced {len(configs)} score config(s) from Langfuse")
635+
636+
if register_scores:
637+
console.print("[cyan]Registering ThreatForest score definitions...[/cyan]")
638+
registered = registry.register_all_score_definitions()
639+
640+
if registered:
641+
console.print(Panel(
642+
f"[green]✓ Registered {len(registered)} score config(s) with Langfuse[/green]\n\n"
643+
"Score configs enable server-side validation of scores.\n"
644+
"View them in Langfuse: Settings → Score Configs",
645+
title="Score Configs Registered",
646+
border_style="green"
647+
))
648+
649+
# Show registered scores
650+
from rich.table import Table
651+
score_table = Table(title="Registered Score Configs", show_header=True, header_style="bold cyan")
652+
score_table.add_column("Name", style="cyan")
653+
score_table.add_column("Type", style="white")
654+
score_table.add_column("Config ID", style="dim")
655+
656+
for name, config in sorted(registered.items()):
657+
score_table.add_row(name, config.data_type, config.config_id[:20] + "...")
658+
659+
console.print(score_table)
660+
else:
661+
console.print("[yellow]No new score configs registered (may already exist)[/yellow]")
662+
663+
except ImportError as e:
664+
console.print(f"[red]Error:[/red] {e}")
665+
console.print("[dim]Install required packages with: pip install langfuse[/dim]\n")
666+
except Exception as e:
667+
console.print(f"[red]Error registering score configs:[/red] {e}")
668+
595669
console.print()
596670

597671

598672
@cli.group()
599673
def export():
600-
"""Export traces from Langfuse to DynamoDB"""
674+
"""Export traces from Langfuse to Langfuse Datasets for evaluation"""
601675
pass
602676

603677

@@ -635,41 +709,42 @@ def export():
635709
help="Only export ground truth candidates",
636710
)
637711
@click.option(
638-
"--traces-table",
639-
default="threatforest-traces",
640-
help="DynamoDB table name for traces (default: threatforest-traces)",
712+
"--dataset-name",
713+
"-d",
714+
required=True,
715+
help="Name of the Langfuse Dataset to export to",
641716
)
642717
@click.option(
643-
"--gt-table",
644-
default="threatforest-ground-truth",
645-
help="DynamoDB table name for ground truth (default: threatforest-ground-truth)",
718+
"--dataset-description",
719+
default=None,
720+
help="Description for the dataset (used when creating new dataset)",
646721
)
647722
@click.option(
648723
"--dry-run",
649724
is_flag=True,
650725
default=False,
651726
help="Show what would be exported without actually exporting",
652727
)
653-
def export_traces(trace_type, status, start_date, end_date, ground_truth_only, traces_table, gt_table, dry_run):
654-
"""Export traces from Langfuse to DynamoDB.
728+
def export_traces(trace_type, status, start_date, end_date, ground_truth_only, dataset_name, dataset_description, dry_run):
729+
"""Export traces from Langfuse to a Langfuse Dataset.
655730
656731
This command queries Langfuse for traces matching the specified filters
657-
and exports them to DynamoDB tables. Ground truth candidates are exported
658-
to a separate table without TTL, while regular traces have a 90-day TTL.
732+
and exports them to a Langfuse Dataset for evaluation. Dataset items include
733+
input/expected_output pairs that can be used for running experiments.
659734
660735
Examples:
661736
662-
# Export all reviewed attack tree traces
663-
threatforest export traces --trace-type attack_tree --status reviewed
737+
# Export all reviewed attack tree traces to a dataset
738+
threatforest export traces --trace-type attack_tree --status reviewed -d attack-trees-v1
664739
665740
# Export traces from a specific date range
666-
threatforest export traces --start-date 2024-01-01 --end-date 2024-01-07
741+
threatforest export traces --start-date 2024-01-01 --end-date 2024-01-07 -d weekly-eval
667742
668743
# Export only ground truth candidates
669-
threatforest export traces --ground-truth-only
744+
threatforest export traces --ground-truth-only -d ground-truth-v1
670745
671746
# Dry run to see what would be exported
672-
threatforest export traces --trace-type attack_tree --dry-run
747+
threatforest export traces --trace-type attack_tree --dry-run -d test-dataset
673748
"""
674749
from datetime import datetime as dt
675750
from rich.table import Table
@@ -678,7 +753,7 @@ def export_traces(trace_type, status, start_date, end_date, ground_truth_only, t
678753
try:
679754
# Import tracing modules
680755
from threatforest.tracing.config import LangfuseConfig
681-
from threatforest.tracing.export import LangfuseExporter, ExportFilter
756+
from threatforest.tracing.export import LangfuseDatasetExporter, ExportFilter
682757

683758
# Parse dates if provided
684759
parsed_start_date = None
@@ -718,17 +793,16 @@ def export_traces(trace_type, status, start_date, end_date, ground_truth_only, t
718793

719794
# Display filter configuration
720795
console.print()
721-
filter_table = Table(title="Export Filter Configuration", show_header=True, header_style="bold cyan")
722-
filter_table.add_column("Filter", style="cyan")
796+
filter_table = Table(title="Export Configuration", show_header=True, header_style="bold cyan")
797+
filter_table.add_column("Setting", style="cyan")
723798
filter_table.add_column("Value", style="white")
724799

800+
filter_table.add_row("Dataset Name", dataset_name)
725801
filter_table.add_row("Trace Type", trace_type or "All")
726802
filter_table.add_row("Review Status", status or "All")
727803
filter_table.add_row("Start Date", start_date or "Not set")
728804
filter_table.add_row("End Date", end_date or "Not set")
729805
filter_table.add_row("Ground Truth Only", "Yes" if ground_truth_only else "No")
730-
filter_table.add_row("Traces Table", traces_table)
731-
filter_table.add_row("Ground Truth Table", gt_table)
732806

733807
console.print(filter_table)
734808
console.print()
@@ -764,41 +838,43 @@ def export_traces(trace_type, status, start_date, end_date, ground_truth_only, t
764838
console.print("[cyan]Connecting to Langfuse...[/cyan]")
765839

766840
try:
767-
exporter = LangfuseExporter(
768-
langfuse_config=langfuse_config,
769-
dynamodb_table=traces_table,
770-
ground_truth_table=gt_table,
771-
)
841+
exporter = LangfuseDatasetExporter(langfuse_config=langfuse_config)
772842
except ValueError as e:
773843
console.print(f"[red]Configuration Error:[/red] {e}")
774844
sys.exit(1)
775845
except ImportError as e:
776846
console.print(f"[red]Missing Dependency:[/red] {e}")
777-
console.print("[dim]Install required packages with: pip install langfuse boto3[/dim]")
847+
console.print("[dim]Install required packages with: pip install langfuse[/dim]")
778848
sys.exit(1)
779849

780850
console.print("[cyan]Querying traces from Langfuse...[/cyan]")
781851

782-
with console.status("[bold cyan]Exporting traces...", spinner="dots"):
783-
result = exporter.export_traces(export_filter)
852+
with console.status("[bold cyan]Exporting traces to dataset...", spinner="dots"):
853+
result = exporter.export_to_dataset(
854+
filters=export_filter,
855+
dataset_name=dataset_name,
856+
dataset_description=dataset_description,
857+
)
784858

785859
# Display results
786860
console.print()
787861
result_table = Table(title="Export Results", show_header=True, header_style="bold green")
788-
result_table.add_column("Category", style="cyan")
862+
result_table.add_column("Metric", style="cyan")
789863
result_table.add_column("Count", style="white", justify="right")
790864

791-
result_table.add_row("Regular Traces", str(result.get("traces", 0)))
792-
result_table.add_row("Ground Truth Records", str(result.get("ground_truth", 0)))
793-
result_table.add_row("Total Exported", str(result.get("traces", 0) + result.get("ground_truth", 0)))
865+
result_table.add_row("Dataset Name", result.get("dataset_name", dataset_name))
866+
result_table.add_row("Total Traces Found", str(result.get("total_traces", 0)))
867+
result_table.add_row("Items Created", str(result.get("items_created", 0)))
868+
result_table.add_row("Items Skipped", str(result.get("items_skipped", 0)))
794869

795870
console.print(result_table)
796871
console.print()
797872

798-
total = result.get("traces", 0) + result.get("ground_truth", 0)
799-
if total > 0:
873+
items_created = result.get("items_created", 0)
874+
if items_created > 0:
800875
console.print(Panel(
801-
f"[green]✓ Successfully exported {total} trace(s) to DynamoDB[/green]",
876+
f"[green]✓ Successfully exported {items_created} item(s) to dataset '{dataset_name}'[/green]\n\n"
877+
f"View your dataset in Langfuse: Datasets → {dataset_name}",
802878
border_style="green"
803879
))
804880
else:
@@ -834,7 +910,7 @@ def help_cmd():
834910
[cyan]config set[/cyan] Set a specific config value
835911
[cyan]config path[/cyan] Show path to active config file
836912
[cyan]config langfuse[/cyan] Configure Langfuse tracing credentials
837-
[cyan]export traces[/cyan] Export traces from Langfuse to DynamoDB
913+
[cyan]export traces[/cyan] Export traces from Langfuse to Langfuse Datasets
838914
[cyan]status[/cyan] Show current workflow status
839915
840916
[bold]Examples:[/bold]
@@ -863,20 +939,26 @@ def help_cmd():
863939
# Test Langfuse connection
864940
threatforest config langfuse --test
865941
942+
# Register score definitions with Langfuse
943+
threatforest config langfuse --register-scores
944+
945+
# Sync local registry with existing Langfuse configs
946+
threatforest config langfuse --sync-scores
947+
866948
# Full workflow with project path
867949
threatforest run --project-path /path/to/project
868950
869951
# TTP enrichment only
870952
threatforest run --mode enrich --input-dir ./threatforest/attack_trees --output-dir ./threatforest/enriched
871953
872954
# Export reviewed attack tree traces
873-
threatforest export traces --trace-type attack_tree --status reviewed
955+
threatforest export traces --trace-type attack_tree --status reviewed -d my-dataset
874956
875957
# Export traces from a date range
876-
threatforest export traces --start-date 2024-01-01 --end-date 2024-01-07
958+
threatforest export traces --start-date 2024-01-01 --end-date 2024-01-07 -d weekly-eval
877959
878960
# Export only ground truth candidates
879-
threatforest export traces --ground-truth-only
961+
threatforest export traces --ground-truth-only -d ground-truth-v1
880962
881963
# View generated HTML dashboard
882964
open path/to/project/threatforest/attack_trees/attack_trees_dashboard.html

src/threatforest/orchestrator.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -330,17 +330,27 @@ def execute_workflow(self) -> Dict[str, Any]:
330330
)
331331

332332
# Capture output with automated metrics (Requirement 5.2)
333-
from .tracing import calculate_automated_metrics
333+
from .tracing import calculate_automated_metrics, generate_mermaid_live_link
334334

335335
trees = attack_trees.get("attack_trees", [])
336336
trees_with_metrics = []
337337
for tree in trees:
338338
tree_content = tree.get("attack_tree_markdown", "")
339+
mermaid_code = tree.get("mermaid_code", "")
339340
metrics = calculate_automated_metrics(tree_content)
340-
trees_with_metrics.append({
341+
342+
tree_data = {
341343
"threat_id": tree.get("threat_id"),
342344
"automated_metrics": metrics,
343-
})
345+
}
346+
347+
# Add Mermaid Live Editor link for visualization in Langfuse
348+
if mermaid_code:
349+
mermaid_link = generate_mermaid_live_link(mermaid_code)
350+
if mermaid_link:
351+
tree_data["mermaid_live_link"] = mermaid_link
352+
353+
trees_with_metrics.append(tree_data)
344354

345355
span.set_output({
346356
"attack_trees": trees,

0 commit comments

Comments
 (0)