44import gradio as gr
55import logging
66import pandas as pd
7+ from datetime import date
78
89from poligrapher .gradio_app import functions
910from poligrapher .gradio_app .policy_analysis import (
1314 PolicyDocumentInfo ,
1415 PolicyDocumentProvider ,
1516)
16- # (Legacy direct script imports removed; generation orchestrated through functions.generate_graph)
1717
1818logger = logging .getLogger (__name__ )
1919logger .setLevel (logging .INFO )
2020# Global in‑memory provider registry
2121providers : list [PolicyDocumentProvider ] = []
22- ## (Legacy CSV Status augmentation code omitted for clarity)
23-
24- # def get_analysis_results():
25- # try:
26- # df = get_company_df()
27- # results = []
28- # return results
29- # except Exception as e:
30- # logger.error("Error loading companies from CSV: %s", e)
31- # return [PolicyAnalysisResult(company_name="error", privacy_policy_url="", score=None, kind="auto", has_name=False, has_score=False)]
32-
33- # TODO: Modify to use PolicyAnalysisResult.get_graph_image_path()
34- # def get_png_for_company(selected_row):
35- # if selected_row is None or not isinstance(selected_row, list) or len(selected_row) == 0:
36- # return None
37- # idx = selected_row[1]
38- # df = get_analysis_results()
39- # if idx >= len(df):
40- # return None
41- # domain = df.iloc[idx]["Domain Name"]
42- # png_path = f"./output/{domain}/knowledge_graph.png"
43- # if os.path.exists(png_path):
44- # return png_path
45- # return None
46-
22+ CSV_PATH = "./poligrapher/gradio_app/policy_list.csv"
4723
4824def add_provider (name : str , industry : str ):
4925 provider = PolicyDocumentProvider (name = name , industry = industry )
@@ -94,55 +70,6 @@ def add_result_to_provider(
9470):
9571 provider .add_result (PolicyAnalysisResult (document = document , score = score , kind = kind ))
9672
97- # def analyze_url(policy: PolicyAnalysisResult):
98- # try:
99- # logger.info("API triggered: analyze_url for company: %s, URL: %s", policy.company_name, policy.privacy_policy_url)
100- # if getattr(policy, "has_graph", False):
101- # logger.info(
102- # "Existing graph detected for %s; skipping regeneration and only scoring.",
103- # policy.company_name,
104- # )
105- # output_info = score_existing_policy(policy)
106- # else:
107- # output_info = process_policy_url(policy)
108-
109- # if (output_info is None) or (not output_info.get("success", True)):
110- # logger.error("Error processing policy URL: %s", output_info.get('message', 'Unknown error'))
111- # return {"error": output_info.get("message", "Unknown error")}
112-
113- # # output_info follows shape { success: True, message: ..., result: { ... } }
114- # result_payload = output_info.get("result", {})
115- # total_score = result_payload.get("total_score")
116- # grade = result_payload.get("grade")
117- # category_scores = result_payload.get("category_scores")
118- # feedback = result_payload.get("feedback")
119- # graph_json_path = result_payload.get("graph_json_path")
120- # structured = result_payload.get("structured")
121-
122- # logger.info(
123- # "API analyze_url completed: %s",
124- # {
125- # "company": policy.company_name,
126- # "score": total_score,
127- # "grade": grade,
128- # "has_graph": getattr(policy, "has_graph", False),
129- # },
130- # )
131-
132- # return {
133- # "total_score": total_score,
134- # "grade": grade,
135- # "category_scores": category_scores,
136- # "feedback": feedback,
137- # "graph_json_path": graph_json_path,
138- # "structured": structured,
139- # }
140-
141- # except Exception as e:
142- # logger.error("Error in analyze_url: %s", e)
143- # return {"error": str(e)}
144-
145-
14673def get_providers (csv_file : str ):
14774 # Reset existing providers to avoid duplicates on repeated calls
14875 providers .clear ()
@@ -195,6 +122,65 @@ def _safe_enum_from_value(val) -> DocumentCaptureSource:
195122 providers .append (provider )
196123
197124
125+ def _providers_to_dataframe () -> pd .DataFrame :
126+ rows = []
127+ for provider in providers :
128+ for doc in provider .documents :
129+ result = next ((r for r in provider .results if r .document == doc ), None )
130+ rows .append (
131+ {
132+ "Provider" : provider .name ,
133+ "Policy URL" : doc .path ,
134+ "Industry" : provider .industry ,
135+ "Source" : getattr (doc .source , "value" , doc .source ),
136+ "Date" : doc .capture_date ,
137+ "Status" : bool (doc .has_results ),
138+ "Score" : getattr (result , "score" , None ),
139+ "Graph Kind" : (
140+ getattr (result .kind , "value" , None )
141+ if result and result .kind
142+ else None
143+ ),
144+ }
145+ )
146+ return pd .DataFrame (
147+ rows ,
148+ columns = [
149+ "Provider" ,
150+ "Policy URL" ,
151+ "Industry" ,
152+ "Source" ,
153+ "Date" ,
154+ "Status" ,
155+ "Score" ,
156+ "Graph Kind" ,
157+ ],
158+ )
159+
160+
161+ def _save_providers_to_csv (path : str = CSV_PATH , allow_empty : bool = False ):
162+ """Persist in-memory providers to CSV.
163+
164+ Protection: Previously this function was invoked before any load, causing
165+ an existing populated CSV to be overwritten by an empty header line.
166+ We now skip writing when the in-memory provider list is empty unless
167+ explicitly forced (allow_empty=True).
168+ """
169+ try :
170+ df = _providers_to_dataframe ()
171+ if df .empty and not allow_empty and os .path .exists (path ):
172+ logger .debug (
173+ "Skip saving providers: would overwrite existing non-empty CSV with empty dataset (%s)" ,
174+ path ,
175+ )
176+ return
177+ os .makedirs (os .path .dirname (path ), exist_ok = True )
178+ df .to_csv (path , index = False )
179+ logger .debug ("Providers persisted to %s (rows=%d)" , path , len (df ))
180+ except Exception as e :
181+ logger .error ("Failed to persist providers to CSV: %s" , e )
182+
183+
198184with gr .Blocks () as block1 :
199185 gr .Markdown ("#### PoliGraph-er Demo" )
200186 company_name_input = gr .Textbox (label = "Company Name" )
@@ -219,8 +205,6 @@ def on_submit_click(company_name, privacy_policy_url, kind):
219205 gr .Markdown ("#### Company Privacy Policy List" )
220206 # Lazy load: summary placeholder (populated on .load())
221207 status_md = gr .Markdown ("" )
222- # Enable the button for demonstration and add a progress bar
223- score_btn = gr .Button ("Score All" , interactive = True )
224208 # Show only relevant columns, including Status
225209 display_cols = [
226210 "Status" ,
@@ -357,8 +341,6 @@ def _build_policies_df(provider_filter: str | None = None):
357341 )
358342
359343 # Policies UI will be added after company_info & png_image definitions
360-
361- # ----- Add Provider Modal UI -----
362344 add_provider_btn = gr .Button ("Add Provider" )
363345 with gr .Group (visible = False , elem_id = "add-provider-modal" ) as add_provider_modal :
364346 with gr .Column (elem_classes = "modal-card" ):
@@ -448,11 +430,12 @@ def _save_new_provider(name: str, industry: str):
448430 file_types = [".pdf" , ".html" , ".htm" ],
449431 visible = False ,
450432 )
451- new_policy_date = gr .Textbox (label = "Capture Date (YYYY-MM-DD)" )
433+ with gr .Row ():
434+ new_policy_date = gr .Textbox (label = "Capture Date (YYYY-MM-DD)" )
435+ new_policy_today = gr .Button ("Today" )
452436 with gr .Row ():
453437 save_new_policy = gr .Button ("Save" , variant = "primary" )
454438 cancel_new_policy = gr .Button ("Cancel" )
455- scoring_output = gr .Textbox (label = "Scoring Results" , interactive = False )
456439
457440 def _show_add_policy_modal (provider_name : str ):
458441 if not provider_name :
@@ -551,7 +534,8 @@ def _save_new_policy(
551534 capture_date = capture_date ,
552535 has_results = False ,
553536 )
554-
537+ # Persist after adding new document
538+ _save_providers_to_csv ()
555539 return (
556540 _build_display_df (), # updated company (providers) table including status
557541 _build_policies_df (provider_name ), # updated policies list
@@ -584,6 +568,14 @@ def _on_policy_source_change(source_val: str):
584568 outputs = [new_policy_file , new_policy_url ],
585569 )
586570
571+ def _set_new_policy_date_today ():
572+ """Set the new policy date textbox to today's date (YYYY-MM-DD)."""
573+ return gr .update (value = date .today ().isoformat ())
574+
575+ new_policy_today .click (
576+ _set_new_policy_date_today , inputs = [], outputs = [new_policy_date ]
577+ )
578+
587579 # Auto-adjust source dropdown when a file is uploaded
588580 def _on_policy_file_change (uploaded_file , current_source ):
589581 if uploaded_file is None :
@@ -655,6 +647,8 @@ def _ensure_graph_assets(doc: PolicyDocumentInfo, force: bool = False) -> bool:
655647 # Final status evaluation
656648 success = doc .has_graph () and doc .has_image ()
657649 doc .has_results = success
650+ # Persist status change if success or partial
651+ _save_providers_to_csv ()
658652 if not success :
659653 logger .debug ("Artifacts incomplete for %s (graph=%s, image=%s)" , doc .path , doc .has_graph (), doc .has_image ())
660654 except BaseException as e :
@@ -759,11 +753,10 @@ def score_all(progress=gr.Progress()):
759753 policies_accordion ,
760754 ],
761755 )
762- score_btn .click (score_all , inputs = [], outputs = scoring_output , show_progress = "full" )
763756
764757 # initial load (after client connects)
765758 def _initial_load ():
766- get_providers ("./poligrapher/gradio_app/policy_list.csv" )
759+ get_providers (CSV_PATH )
767760 df = _build_display_df ()
768761 num_success = sum (
769762 1 for p in providers if p .documents and p .documents [0 ].has_results
0 commit comments