generative-computing · cheukhei-chu · Nov 13, 2025 · Nov 13, 2025 · Nov 13, 2025 · Nov 13, 2025
diff --git a/docs/examples/hilbert/hilbert.py b/docs/examples/hilbert/hilbert.py
@@ -0,0 +1,122 @@
+import subprocess
+from pathlib import Path
+import os
+
+from mellea.stdlib.sampling import RejectionSamplingStrategy
+from mellea.stdlib.requirement import Requirement, simple_validate
+from mellea.stdlib.reqlib.lean import extract_lean_code, HasLeanCode, LeanCodeClearOfUnsafePrimitives, LeanCodeProvesWithoutCheating, LeanCodePreservesTheorem, LeanCodeVerifies, LeanCodeWithinLengthLimit
+import mellea.stdlib.reqlib.md
+
+class Hilbert:
+    def __init__(self,
+                 retriever = None,
+                 reasoner: mellea.MelleaSession = None,
+                 prover: mellea.MelleaSession = None,
+                 lean_project_path: Path | str = None):
+        self.retriever = retriever
+        self.reasoner = reasoner
+        self.prover = prover
+        if lean_project_path is None:
+            lean_project_path = os.environ.get("LEAN_PROJECT_PATH")
+        self.lean_project_path = Path(lean_project_path)
+
+        result = subprocess.run(
+                ["lake", "env", "lean", "-v"],
+                cwd=self.lean_project_path,
+                capture_output=True,
+                text=True
+            )
+        assert result.returncode == 0, f"Stdout: \n{result.stdout}\n Stderr: \n{result.stderr}"
+        self.lean_version = result.stdout
+        print("Lean version:", self.lean_version)
+
+    def FormulateFormalStatement(self, informal_problem):
+        assert isinstance(self.reasoner, mellea.MelleaSession) and isinstance(self.lean_project_path, Path)
+        lean_candidate = self.reasoner.instruct(
+            f"""
+            Translate the following problem statement into a theorem in Lean 4 with the following instructions:
+            1) Lean 4 version is {self.lean_version}
+            2) There should be one theorem only and nothing else
+            3) Make up a descriptive name for the theorem; make it unique and distinct from anything possibly occuring in Mathlib4.
+            4) No need to write a proof; just translate the statement into a theorem and use the 'sorry' placeholder
+            5) You may import Mathlib4
+            6) Output in the format '```lean4\\s*\\n(.*?)```'
+            The problem statement is as follows: {{informal_problem}}""",
+            requirements=[
+                HasLeanCode(),
+                LeanCodeClearOfUnsafePrimitives(),
+                f"The lean code consists of one theorem which is a true translation of the informal problem statement: {informal_problem}",
+                LeanCodeVerifies(self.lean_project_path),
+            ],
+            strategy=RejectionSamplingStrategy(loop_budget=5),
+            user_variables={"informal_problem": informal_problem},
+            return_sampling_results=True,
+        )
+        if lean_candidate.success:
+            return extract_lean_code(str(lean_candidate.result))
+        else:
+            return None
+
+    def AttemptReasonerProof(self):
+        pass
+
+
+    def AttemptProverLLMProof(self, theorem: str) -> str | None:
+        assert isinstance(self.prover, mellea.MelleaSession) and isinstance(self.lean_project_path, Path)
+        lean_candidate = self.prover.instruct(
+            # f"""
+            # You are given Lean 4 code that defines exactly one theorem.
+            # The theorem currently ends with the placeholder 'sorry'.
+
+            # Your task: replace the 'sorry' and give a valid Lean 4 proof for the given theorem. Here are the instructions:
+            # 1) Lean 4 version is {self.lean_version}
+            # 2) You may import Mathlib4. You may add import statements at the top if necessary, but you must not modify
+            # anything else in the code (names, structure, or theorem statement)
+            # 3) Include the given theorem statement in your output. Do not change the theorem at all.
+            # 4) Reminder: module Mathlib.Tactic does not exist
+            # 5) Output in the format '```lean4\\s*\\n(.*?)```'
+            # The Lean 4 code is as follows: {{theorem}}""",
+            f"""Think step-by-step to complete the following Lean 4 proof.
+            {theorem}
+            Rules:
+            4. You may import Mathlib4. Do not change any of the existing imports (if any).
+            5. Use proper Lean 4 syntax and conventions. Ensure the proof sketch is enclosed in
+            triple backticks ```lean4```.
+            6. Only include a single Lean 4 code block, corresponding to the proof along with
+            the theorem statement.
+            7. When dealing with large numerical quantities, avoid explicit computation as much
+            as possible. Use tactics like rw to perform symbolic manipulation rather than
+            numerical computation.
+            8. Do NOT use sorry.
+            9. Do NOT change anything in the original theorem statement.
+            """,
+            requirements=[
+                HasLeanCode(),
+                LeanCodeClearOfUnsafePrimitives(),
+                LeanCodeProvesWithoutCheating(),
+                # f"The lean code attempts to prove the theorem: {theorem}",
+                LeanCodePreservesTheorem(theorem),
+                LeanCodeVerifies(self.lean_project_path),
+                LeanCodeWithinLengthLimit(30),
+            ],
+            strategy=RejectionSamplingStrategy(loop_budget=5),
+            user_variables={"theorem": theorem},
+            return_sampling_results=True,
+        )
+        if lean_candidate.success:
+            return extract_lean_code(str(lean_candidate.result))
+        else:
+            return None
+
+    def RetrieveTheorems(problem, error_message = None):
+        pass
+        # 2: ▷ Theorem retrieval from Mathlib with optional parameter for error message
+        # 3: if retrieval_enabled then
+        # 4: search_queries ← GENERATESEARCHQUERIES(problem, error_message)
+        # 5: candidate_theorems ← SEMANTICSEARCHENGINE(search_queries)
+        # 6: relevant_theorems ← SELECTRELEVANTTHEOREMS(candidate_theorems, problem)
+        # 7: return relevant_theorems
+        # 8: else
+        # 9: return ∅
+        # 10: end if
+        # 11: end function
diff --git a/docs/examples/hilbert/mathlib_retriever.py b/docs/examples/hilbert/mathlib_retriever.py
@@ -0,0 +1,40 @@
+from __future__ import annotations
+from typing import List, Tuple
+import torch
+from sentence_transformers import SentenceTransformer, util
+from datasets import load_dataset
+
+class Retriever:
+    def __init__(self, dataset_name: str, model_name: str = "sentence-transformers/all-mpnet-base-v2"):
+        self.model: SentenceTransformer = SentenceTransformer(model_name)
+        self.dataset = load_dataset(dataset_name)
+
+        # Assume your dataset has a text column and possibly precomputed embeddings
+        if "embedding" in self.dataset["train"].features:
+            self.embeddings: torch.tensor = torch.tensor(self.dataset["train"]["embedding"])
+        else:
+            texts: List[str] = self.dataset["train"]["informal_description"]
+            self.embeddings = self.model.encode(texts, convert_to_tensor=True, show_progress_bar=True)
+
+    def search(self, query: str, k: int = 5) -> List[Tuple[str, float]]:
+        query_emb: torch.tensor = self.model.encode(query, convert_to_tensor=True)
+        cosine_scores: torch.tensor = util.cos_sim(query_emb, self.embeddings)[0]
+
+        # Get top-k results
+        top_results = torch.topk(cosine_scores, k)
+        top_indices = top_results.indices.tolist()
+        top_scores = top_results.values.tolist()
+
+        results = [
+            (self.dataset["train"][i]["name"], float(top_scores[j]))
+            for j, i in enumerate(top_indices)
+        ]
+        return results
+
+if __name__ == "__main__":
+    engine = Retriever("FrenzyMath/mathlib_informal_v4.19.0")  # hypothetical dataset
+    query = "Prove that every continuous function on [0,1] is bounded."
+    top_matches = engine.search(query, k=3)
+
+    for text, score in top_matches:
+        print(f"{score:.4f} | {text[:80]}...")
diff --git a/docs/examples/hilbert/prove.py b/docs/examples/hilbert/prove.py
@@ -0,0 +1,23 @@
+# export PYTHONPATH="/path/to/your/dir:$PYTHONPATH"
+# export LEAN_PROJECT_PATH="/path/to/your/dir"
+
+import mellea
+from docs.examples.hilbert.hilbert import Hilbert
+from docs.examples.hilbert.mathlib_retriever import Retriever
+
+retriever = None
+reasoner = mellea.start_session("ollama", "gpt-oss:120b-cloud")
+prover = mellea.start_session("ollama", "gpt-oss:120b-cloud")
+# mellea.start_session("hf", "deepseek-ai/DeepSeek-Prover-V2-7B")
+# mellea.start_session("ollama", "deepseek-v3.1:671b-cloud")
+# mellea.start_session("hf", "deepseek-ai/DeepSeek-Prover-V2-671B")
+
+hilbert = Hilbert(retriever, reasoner, prover, lean_project_path=None)
+theorem = hilbert.FormulateFormalStatement(
+    # "1+1=2"
+    # "a^2 is non-negative for all reals a",
+    "x^2+x+1 is positive for all reals x",
+)
+print(theorem)
+proof = hilbert.AttemptProverLLMProof(theorem)
+print(proof)
diff --git a/mellea/stdlib/reqlib/__init__.py b/mellea/stdlib/reqlib/__init__.py