fyne-coder
diff --git a/‎api/__pycache__/main.cpython-311.pyc‎
316 Bytes b/‎api/__pycache__/main.cpython-311.pyc‎
316 Bytes
diff --git a/‎api/config.py‎
Lines changed: 7 additions & 0 deletions b/‎api/config.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎api/main.py‎
Lines changed: 17 additions & 6 deletions b/‎api/main.py‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎api/nodes/new_pipeline/business_anchor_guard.py‎
Lines changed: 36 additions & 19 deletions b/‎api/nodes/new_pipeline/business_anchor_guard.py‎
Lines changed: 36 additions & 19 deletions
diff --git a/‎api/nodes/new_pipeline/framework_select_node.py‎
Lines changed: 3 additions & 8 deletions b/‎api/nodes/new_pipeline/framework_select_node.py‎
Lines changed: 3 additions & 8 deletions
diff --git a/‎api/nodes/new_pipeline/keyphrase_node.py‎
Lines changed: 11 additions & 21 deletions b/‎api/nodes/new_pipeline/keyphrase_node.py‎
Lines changed: 11 additions & 21 deletions
diff --git a/‎api/nodes/new_pipeline/mini_master_prompt_node.py‎
Lines changed: 41 additions & 0 deletions b/‎api/nodes/new_pipeline/mini_master_prompt_node.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎api/nodes/new_pipeline/pipeline.py‎
Lines changed: 10 additions & 12 deletions b/‎api/nodes/new_pipeline/pipeline.py‎
Lines changed: 10 additions & 12 deletions
diff --git a/‎api/nodes/new_pipeline/prompt_draft_node.py‎
Lines changed: 20 additions & 22 deletions b/‎api/nodes/new_pipeline/prompt_draft_node.py‎
Lines changed: 20 additions & 22 deletions
diff --git a/‎api/nodes/page_llm_profile_node.py‎
Lines changed: 62 additions & 0 deletions b/‎api/nodes/page_llm_profile_node.py‎
Lines changed: 62 additions & 0 deletions
@@ -0,0 +1,7 @@
+"""
+Configuration constants for OpenAI integration.
+"""
+import os
+
+# Default model for LLM nodes, configurable via environment variable.
+OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4.1-mini-2025-04-14")
@@ -21,6 +21,7 @@
 from api.nodes.rank_node import RankNode
 from api.nodes.guide_node import GuideNode
 from api.nodes.pdf_builder_node import PdfBuilderNode
+from api.nodes.page_llm_profile_node import PageLLMProfileNode
 from api.nodes.new_pipeline.pipeline import Generate10Pipeline
 from api.nodes.new_pipeline.web_fetch_node import WebFetchNode
 from api.nodes.new_pipeline.local_fetch_node import LocalFetchNode
@@ -61,13 +62,23 @@ async def generate(request: Request):
     if not url:
         raise HTTPException(status_code=400, detail="Missing 'url' in request body")
     try:
-        # Pipeline: fetch, summarise, assets, prompts, ranking, tips, PDF
-        raw_text = FetchSummaryNode(url)
-        master_prompt = SummariseNode(raw_text)
-        assets = AssetsNode(url)
-        groups = PromptsNode(master_prompt, assets.get('palette', []))
-        bests = RankNode(groups)
+        # Pipeline: one-shot profile, prompts, ranking, tips, assets, PDF
+        profile = PageLLMProfileNode(url)
+        framework_plan = {
+            "key_phrases": profile["keywords"],
+            "sector":       profile["sector"],
+            "services":     profile["services"],
+            "geo":          profile["geo"],
+            "brand_tone":   profile["brand_tone"],
+        }
+        prompts = PromptDraftNode(
+            text=" ".join(profile["value_props"]),
+            framework_plan=framework_plan,
+        )
+        bests = RankNode(prompts)
         tips = GuideNode(bests)
+        # branding assets
+        assets = AssetsNode(url)
         pdf_bytes = PdfBuilderNode(
             assets.get('logo_url'),
             assets.get('palette', []),
 
@@ -1,24 +1,41 @@
-from api.nodes.fetch_summary_node import Node
 import logging
+import re
+import collections
+
+from api.nodes.fetch_summary_node import Node
 
 @Node(retries=1)
-def BusinessAnchorGuard(prompts, keyphrases: list[str]):
+def BusinessAnchorGuard(prompts, keyphrases_or_capsule):
     """
-    Keep prompts that mention at least one scraped key-phrase.
-    If no key-phrases were extracted, return an empty list.
+    Dual-mode anchoring:
+    - Legacy mode: prompts=list[str], keyphrases_or_capsule=list[str] => filter by keyphrases.
+    - New mode: prompts_by_cat=dict[str,list[str]], keyphrases_or_capsule=capsule(str) => filter by capsule nouns.
     """
-    # Skip anchoring for grouped prompts (dict input)
-    if isinstance(prompts, dict):
-        logging.getLogger(__name__).info(
-            "BusinessAnchorGuard: grouped prompts detected, skipping filter")
-        return prompts
-    if not keyphrases:
-        # No keyphrases: return original prompts
-        return prompts
-
-    lowered_phrases = [kp.lower() for kp in keyphrases]
-    anchored = [
-        p for p in prompts
-        if any(phrase in p.lower() for phrase in lowered_phrases)
-    ]
-    return anchored
+    logger = logging.getLogger(__name__)
+    # Legacy mode: list of prompts and list of keyphrases
+    if isinstance(prompts, list) and isinstance(keyphrases_or_capsule, list):
+        keyphrases = keyphrases_or_capsule
+        if not keyphrases:
+            return prompts
+        lowered = [kp.lower() for kp in keyphrases]
+        return [p for p in prompts if any(phrase in p.lower() for phrase in lowered)]
+    # New mode: dict of categories and capsule text
+    if isinstance(prompts, dict) and isinstance(keyphrases_or_capsule, str):
+        capsule = keyphrases_or_capsule
+        # Extract words of length ≥4 as candidate nouns
+        nouns = re.findall(r"\b[A-Za-z]{4,}\b", capsule.lower())
+        top_nouns = [w for w, _ in collections.Counter(nouns).most_common(20)]
+        anchored = {}
+        for cat, items in prompts.items():
+            filtered = []
+            for p in items:
+                lp = p.lower()
+                overlap = sum(1 for noun in top_nouns if noun in lp)
+                if overlap >= 3:
+                    filtered.append(p)
+            # fallback if no prompts matched
+            anchored[cat] = filtered or items
+        return anchored
+    # Fallback: return original
+    logger.info("BusinessAnchorGuard: unexpected args, returning original prompts")
+    return prompts
@@ -1,16 +1,11 @@
 from api.nodes.fetch_summary_node import Node
+from api.nodes.new_pipeline.prompt_draft_node import QUOTAS
 
 @Node(retries=1)
 def FrameworkSelectNode(keyphrases: list[str]) -> dict:
     """
     Determine the frameworks mix (RTF, RISEN, CRISPE) per PRD category quotas.
     Returns a plan dict, e.g., {'Marketing':3,'Sales':2,...}.
     """
-    # Fixed category quota: 3 Marketing, 2 Sales, 2 Success, 2 Product, 1 Ops
-    return {
-        'Marketing': 3,
-        'Sales': 2,
-        'Success': 2,
-        'Product': 2,
-        'Ops': 1,
-    }
+    # Use unified quotas from PromptDraftNode
+    return QUOTAS
@@ -1,27 +1,17 @@
+"""
+Legacy keyphrase extraction node retained for backward compatibility.
+"""
+import logging
+
 from api.nodes.fetch_summary_node import Node
 
+logger = logging.getLogger(__name__)
+
 @Node(retries=1)
 def KeyphraseNode(text: str) -> list[str]:
     """
-    Extract at least 15 key phrases from the cleaned text for relevance scoring.
-    Returns a list of key phrase strings.
+    Extract key-phrases from the given text.
+    Legacy stub: returns an empty list; override via pipeline node or tests.
     """
-    import re
-    from collections import Counter
-
-    # Simple keyphrase extraction: top frequent words excluding stopwords
-    # Define basic English stopwords
-    stopwords = {
-        'the', 'and', 'a', 'an', 'of', 'to', 'in', 'for', 'on', 'with', 'as',
-        'by', 'at', 'is', 'are', 'was', 'were', 'be', 'been', 'it', 'this',
-        'that', 'from', 'or', 'but', 'not', 'your', 'our', 'their', 'its'
-    }
-    # Normalize text to lowercase and extract words
-    words = re.findall(r"\b\w+\b", text.lower())
-    # Filter tokens
-    tokens = [w for w in words if w not in stopwords and len(w) > 2]
-    # Count frequencies
-    freq = Counter(tokens)
-    # Return top 15 keyphrases (or fewer if not enough)
-    keyphrases = [word for word, _ in freq.most_common(15)]
-    return keyphrases
+    logger.warning("KeyphraseNode is deprecated; returning empty list.")
+    return []
@@ -0,0 +1,41 @@
+"""
+Node to condense raw HTML into a business context capsule via LLM.
+"""
+import functools
+import logging
+import openai
+
+from api.nodes.fetch_summary_node import Node
+from api.config import OPENAI_MODEL
+
+logger = logging.getLogger(__name__)
+
+@functools.lru_cache(maxsize=128)
+@Node(retries=2)
+def MiniMasterPromptNode(html: str) -> str:
+    """
+    Condense supplied HTML into a ≤350-word Business Context Capsule.
+    """
+    client = openai.OpenAI()
+    try:
+        resp = client.chat.completions.create(
+            model=OPENAI_MODEL,
+            response_format={"type": "text"},
+            temperature=0.3,
+            max_tokens=500,
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "Condense the supplied HTML into a ≤350-word Business Context Capsule "
+                        "(USPs, services, tone, geo, benefits). Return plain text."
+                    ),
+                },
+                {"role": "user", "content": html[:14000]},
+            ],
+        )
+        capsule = resp.choices[0].message.content.strip()
+    except Exception as e:
+        logger.error("MiniMasterPromptNode LLM error: %s", e)
+        raise
+    return capsule
@@ -7,7 +7,7 @@
 from api.nodes.new_pipeline.web_fetch_node import WebFetchNode
 from api.nodes.new_pipeline.local_fetch_node import LocalFetchNode
 from api.nodes.new_pipeline.clean_node import CleanNode
-from api.nodes.new_pipeline.keyphrase_node import KeyphraseNode
+from api.nodes.new_pipeline.mini_master_prompt_node import MiniMasterPromptNode
 from api.nodes.new_pipeline.framework_select_node import FrameworkSelectNode
 from api.nodes.new_pipeline.prompt_draft_node import PromptDraftNode
 from api.nodes.new_pipeline.deduplicate_node import DeduplicateNode
@@ -41,22 +41,20 @@ def Generate10Pipeline(url: str) -> bytes:
     # Step 2: clean
     text = CleanNode(html)
     logger.info("CleanNode output text length=%d", len(text))
-    # Step 3: keyphrases
-    keyphrases = KeyphraseNode(text)
-    logger.info("KeyphraseNode output: %r", keyphrases)
-    # Step 4: framework plan
-    plan = FrameworkSelectNode(keyphrases)
+    # Step 3: generate business capsule
+    capsule = MiniMasterPromptNode(html)
+    logger.info("MiniMasterPromptNode capsule length=%d", len(capsule))
+    # Step 4: framework plan (static quotas)
+    plan = FrameworkSelectNode([])
     logger.info("FrameworkSelectNode plan: %r", plan)
-    plan["key_phrases"] = keyphrases          
-    # Step 5: draft prompts
-    raw_prompts = PromptDraftNode(text, plan)
+    # Step 5: draft prompts based on capsule
+    raw_prompts = PromptDraftNode(capsule, plan)
     logger.info("PromptDraftNode output: %r", raw_prompts)
     # Step 6: dedupe
     unique_prompts = DeduplicateNode(raw_prompts)
     logger.info("DeduplicateNode output: %r", unique_prompts)
-    # Step 7: business anchor
-    # BusinessAnchorGuard may filter within categories or lists
-    anchored_prompts = BusinessAnchorGuard(unique_prompts, keyphrases)
+    # Step 7: business anchor using capsule nouns
+    anchored_prompts = BusinessAnchorGuard(unique_prompts, capsule)
     logger.info("BusinessAnchorGuard output: %r", anchored_prompts)
     # Step 8: enforce quota
     final_prompts = QuotaEnforceNode(anchored_prompts, plan)
 
@@ -1,56 +1,54 @@
-from api.nodes.fetch_summary_node import Node
 import logging
+import json
+from api.nodes.fetch_summary_node import Node
+from api.config import OPENAI_MODEL
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+# Prompt quotas per category (total 9 slots)
+QUOTAS = {"Marketing": 3, "Sales": 2, "Product": 2, "Success": 1, "Ops": 1}
+
 @Node(retries=3)
 def PromptDraftNode(text: str, framework_plan: dict) -> dict[str, list[str]]:
     """
     Draft 10-25 raw prompts with explicit constraints and strong business anchoring.
     `framework_plan` **must** contain "key_phrases": list[str].
     """
-    import json, hashlib, logging, openai
-
-    logger = logging.getLogger(__name__)
-    key_phrases: list[str] = framework_plan.get("key_phrases", [])
-    min_phrases_required = 2 if key_phrases else 1        # fallback if none
+    import openai, hashlib
 
+    # Framework plan may include quotas and capsule context
     client = openai.OpenAI()
 
+    # System prompt for generating prompt packs grounded in business capsule
     system_msg = {
         "role": "system",
         "content": (
-            "Draft 10-25 AI prompts grouped by business function. "
-            "Return ONLY valid JSON shaped as:\n"
-            "{\n"
-            "  \"Marketing\": [\"You are a ...\", ...],\n"
-            "  \"Sales\": [...],\n"
-            "  \"Success\": [...],\n"
-            "  \"Product\": [...],\n"
-            "  \"Ops\": [...]\n"
-            "}\n"
-            f"Rules: • each prompt begins with \"You are a ...\" • min {min_phrases_required} key-phrases "
-            "• ≤220 tokens • quotas: Marketing 3, Sales 2, Success 2, Product 2, Ops 1."
+            "You are a Prompt-Pack Generator. Given a Business Context Capsule and a framework plan, "
+            "generate 10–25 high-quality prompts grouped by business function. "
+            "Return ONLY valid JSON mapping categories to arrays of prompt strings. "
+            "Prompts must be no more than 220 tokens each. "
+            f"Quotas per category: {QUOTAS}."
         )
     }
 
     user_msg = {
         "role": "user",
         "content": (
-            f"<business_text>{text}</business_text>\n"
-            f"<key_phrases>{', '.join(key_phrases)}</key_phrases>\n"
+            f"<capsule>{text}</capsule>\n"
             f"<framework_plan>{json.dumps(framework_plan, ensure_ascii=False)}</framework_plan>"
         ),
     }
 
+    # Use deterministic seed for repeatability
     seed_val = int(
-        hashlib.sha256((text + 'gpt-4.1-mini-2025-04-14').encode()).hexdigest(), 16
+        hashlib.sha256((text + OPENAI_MODEL).encode()).hexdigest(), 16
     ) % 2**31
 
     resp = client.chat.completions.create(
-        model="gpt-4.1-mini-2025-04-14",
+        model=OPENAI_MODEL,
         messages=[system_msg, user_msg],
-        temperature=0.0,          # deterministic
+        temperature=0.35,
         seed=seed_val,
     )
 
 
@@ -0,0 +1,62 @@
+"""
+Node that uses an LLM to fetch and profile a webpage in one shot.
+Returns structured JSON with business metadata.
+"""
+import json
+import logging
+import openai
+
+from api.nodes.fetch_summary_node import Node
+from api.config import OPENAI_MODEL
+
+logger = logging.getLogger(__name__)
+
+@Node(retries=2)
+def PageLLMProfileNode(url: str) -> dict:
+    """
+    One-shot profile of a business homepage via LLM.
+    Returns JSON with keys: name, sector, services, geo,
+    value_props, brand_tone, keywords.
+    """
+    client = openai.OpenAI()
+    try:
+        resp = client.chat.completions.create(
+            model=OPENAI_MODEL,
+            tools=[{"type": "web_search"}],
+            response_format={"type": "json_object"},
+            temperature=0.3,
+            max_tokens=400,
+            messages=[
+                {
+                    "role": "system",
+                    "content": (
+                        "You are BizScraper-AI. Use web_search to visit the URL, "
+                        "gather up to 14 000 chars, and return ONLY valid JSON with: "
+                        "name, sector, services (≤5), geo, value_props (3–5), "
+                        "brand_tone ['friendly','formal','premium','fun','neutral','playful'], "
+                        "keywords (≤8). If missing, use empty string/list."
+                    )
+                },
+                {"role": "user", "content": url, "tool": "web_search"},
+            ],
+        )
+        raw = resp.choices[0].message.content
+    except Exception as e:
+        logger.error("LLM profile call failed: %s", e)
+        raise
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError:
+        logger.error("JSON decode error in PageLLMProfileNode: %s", raw[:500])
+        # Clear cache entry on parse failure so subsequent calls retry
+        try:
+            PageLLMProfileNode.cache_clear()
+        except Exception:
+            pass
+        data = {}
+    # fill defaults
+    for key in ("name", "sector", "geo", "brand_tone"):
+        data.setdefault(key, "")
+    for key in ("services", "value_props", "keywords"):
+        data.setdefault(key, [])
+    return data