LLMSQL
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/tests.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 16 additions & 14 deletions b/‎README.md‎
Lines changed: 16 additions & 14 deletions
diff --git a/‎examples/inference.ipynb‎
Lines changed: 0 additions & 4728 deletions b/‎examples/inference.ipynb‎
Lines changed: 0 additions & 4728 deletions
diff --git a/‎llmsql/__init__.py‎
Lines changed: 22 additions & 6 deletions b/‎llmsql/__init__.py‎
Lines changed: 22 additions & 6 deletions
diff --git a/‎llmsql/__main__.py‎
Lines changed: 146 additions & 0 deletions b/‎llmsql/__main__.py‎
Lines changed: 146 additions & 0 deletions
diff --git a/‎llmsql/config/config.py‎
Lines changed: 2 additions & 0 deletions b/‎llmsql/config/config.py‎
Lines changed: 2 additions & 0 deletions
@@ -30,7 +30,7 @@ jobs:
             ${{ runner.os }}-pdm-
 
       - name: Install dependencies (with dev)
-        run: pdm install --with dev
+        run: pdm install --with dev,vllm
 
       - name: Run tests with coverage
         run: PYTHONPATH=. pdm run pytest --cov=llmsql --cov-report=xml --maxfail=1 --disable-warnings -v
@@ -41,4 +41,4 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
           files: ./coverage.xml
           flags: unittests
-          fail_ci_if_error: true
+          fail_ci_if_error: false
@@ -7,3 +7,6 @@ dist/
 *.egg-info/
 .pdm-python
 .vscode
+
+.coverage
+llmsql_workdir
@@ -40,14 +40,19 @@ Modern LLMs are already strong at **producing SQL queries without finetuning**.
 We therefore recommend that most users:
 
 1. **Run inference** directly on the full benchmark:
-   - Use [`llmsql.LLMSQLVLLMInference`](./llmsql/inference/inference.py) (the main inference class) for generation of SQL predictions with your LLM from HF.
+    model_or_model_name_or_path="Qwen/Qwen2.5-1.5B-Instruct",
+    output_file="path_to_your_outputs.jsonl",
+   - Use [`llmsql.inference_transformers`](./llmsql/inference/inference_transformers.py) (the function for transformers inference) for generation of SQL predictions with your model. If you want to do vllm based inference, use [`llmsql.inference_vllm`](./llmsql/inference/inference_vllm.py). Works both with HF model id, e.g. `Qwen/Qwen2.5-1.5B-Instruct` and model instance passed directly, e.g. `inference_transformers(model_or_model_name_or_path=model, ...)`
    - Evaluate results against the benchmark with the [`llmsql.LLMSQLEvaluator`](./llmsql/evaluation/evaluator.py) evaluator class.
 
 2. **Optional finetuning**:
    - For research or domain adaptation, we provide finetuning script for HF models. Use `llmsql finetune --help` or read [Finetune Readme](./llmsql/finetune/README.md) to find more about finetuning.
 
 > [!Tip]
 > You can find additional manuals in the README files of each folder([Inferece Readme](./llmsql/inference/README.md), [Evaluation Readme](./llmsql/evaluation/README.md), [Finetune Readme](./llmsql/finetune/README.md))
+
+> [!Tip]
+> vllm based inference require vllm optional dependency group installed: `pip install llmsql[vllm]`
 ---
 
 ## Repository Structure
@@ -77,24 +82,21 @@ pip3 install llmsql
 ### 1. Run Inference
 
 ```python
-from llmsql import LLMSQLVLLMInference
+from llmsql import inference_transformers
 
-# Initialize inference engine
-inference = LLMSQLVLLMInference(
-    model_name="Qwen/Qwen2.5-1.5B-Instruct",  # or any Hugging Face causal LM
-    tensor_parallel_size=1,
-)
-
-# Run generation
-results = inference.generate(
+# Run generation directly with transformers
+results = inference_transformers(
+    model_or_model_name_or_path="Qwen/Qwen2.5-1.5B-Instruct",
     output_file="path_to_your_outputs.jsonl",
-    questions_path="data/questions.jsonl",
-    tables_path="data/tables.jsonl",
-    shots=5,
+    num_fewshots=5,
     batch_size=8,
     max_new_tokens=256,
-    temperature=0.7,
+    do_sample=False,
+    model_args={
+        "torch_dtype": "bfloat16",
+    }
 )
+
 ```
 
 ### 2. Evaluate Results
 
@@ -1,16 +1,32 @@
+"""
+LLMSQL — A Text2SQL benchmark for evaluation of Large Language Models
+"""
+
 __version__ = "0.1.4"
 
 
 def __getattr__(name: str):  # type: ignore
-    if name == "LLMSQLVLLMInference":
-        from .inference.inference import LLMSQLVLLMInference
-
-        return LLMSQLVLLMInference
-    elif name == "LLMSQLEvaluator":
+    if name == "LLMSQLEvaluator":
         from .evaluation.evaluator import LLMSQLEvaluator
 
         return LLMSQLEvaluator
+    elif name == "inference_vllm":
+        try:
+            from .inference.inference_vllm import inference_vllm
+
+            return inference_vllm
+        except ModuleNotFoundError as e:
+            if "vllm" in str(e):
+                raise ImportError(
+                    "The vLLM backend is not installed. "
+                    "Install it with: pip install llmsql[vllm]"
+                ) from e
+            raise
+    elif name == "inference_transformers":
+        from .inference.inference_transformers import inference_transformers
+
+        return inference_transformers
     raise AttributeError(f"module {__name__} has no attribute {name!r}")
 
 
-__all__ = ["LLMSQLEvaluator", "LLMSQLVLLMInference"]
+__all__ = ["LLMSQLEvaluator", "inference_vllm", "inference_transformers"]
@@ -1,11 +1,16 @@
 import argparse
+import inspect
+import json
 import sys
 
 
 def main() -> None:
     parser = argparse.ArgumentParser(prog="llmsql", description="LLMSQL CLI")
     subparsers = parser.add_subparsers(dest="command")
 
+    # ================================================================
+    # Fine-tuning command
+    # ================================================================
     ft_parser = subparsers.add_parser(
         "finetune",
         help="Fine-tune a causal LM on the LLMSQL benchmark.",
@@ -21,13 +26,154 @@ def main() -> None:
         help="Path to a YAML config file containing training parameters.",
     )
 
+    # ================================================================
+    # Inference command
+    # ================================================================
+    inference_examples = r"""
+Examples:
+
+  # 1️⃣ Run inference with Transformers backend
+  llmsql inference --method transformers \
+      --model-or-model-name-or-path Qwen/Qwen2.5-1.5B-Instruct \
+      --output-file outputs/preds_transformers.jsonl \
+      --batch-size 8 \
+      --num-fewshots 5
+
+  # 2️⃣ Run inference with vLLM backend
+  llmsql inference --method vllm \
+      --model-name Qwen/Qwen2.5-1.5B-Instruct \
+      --output-file outputs/preds_vllm.jsonl \
+      --batch-size 8 \
+      --num-fewshots 5
+
+  # 3️⃣ Pass model-specific kwargs (for Transformers)
+  llmsql inference --method transformers \
+      --model-or-model-name-or-path meta-llama/Llama-3-8b-instruct \
+      --output-file outputs/llama_preds.jsonl \
+      --model-args '{"attn_implementation": "flash_attention_2", "torch_dtype": "bfloat16"}'
+
+  # 4️⃣ Pass LLM init kwargs (for vLLM)
+  llmsql inference --method vllm \
+      --model-name mistralai/Mixtral-8x7B-Instruct-v0.1 \
+      --output-file outputs/mixtral_preds.jsonl \
+      --llm-kwargs '{"max_model_len": 4096, "gpu_memory_utilization": 0.9}'
+
+  # 5️⃣ Override generation parameters dynamically
+  llmsql inference --method transformers \
+      --model-or-model-name-or-path Qwen/Qwen2.5-1.5B-Instruct \
+      --output-file outputs/temp_0.9.jsonl \
+      --temperature 0.9 \
+      --generate-kwargs '{"do_sample": true, "top_p": 0.9, "top_k": 40}'
+"""
+
+    inf_parser = subparsers.add_parser(
+        "inference",
+        help="Run inference using either Transformers or vLLM backend.",
+        description="Run SQL generation using a chosen inference method "
+        "(either 'transformers' or 'vllm').",
+        epilog=inference_examples,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+
+    inf_parser.add_argument(
+        "--method",
+        type=str,
+        required=True,
+        choices=["transformers", "vllm"],
+        help="Inference backend to use ('transformers' or 'vllm').",
+    )
+
+    # ================================================================
+    # Parse CLI
+    # ================================================================
     args, extra = parser.parse_known_args()
 
+    # ------------------------------------------------
+    # Fine-tune
+    # ------------------------------------------------
     if args.command == "finetune":
         from llmsql.finetune import finetune
 
         sys.argv = ["llmsql-finetune"] + extra + ["--config_file", args.config_file]
         finetune.run_cli()
+
+    # ------------------------------------------------
+    # Inference
+    # ------------------------------------------------
+    elif args.command == "inference":
+        if args.method == "vllm":
+            from llmsql import inference_vllm as inference_fn
+        elif args.method == "transformers":
+            from llmsql import inference_transformers as inference_fn  # type: ignore
+        else:
+            raise ValueError(f"Unknown inference method: {args.method}")
+
+        # Dynamically create parser from the function signature
+        fn_parser = argparse.ArgumentParser(
+            prog=f"llmsql inference --method {args.method}",
+            description=f"Run inference using {args.method} backend",
+        )
+
+        sig = inspect.signature(inference_fn)
+        for name, param in sig.parameters.items():
+            if param.kind == inspect.Parameter.VAR_KEYWORD:
+                fn_parser.add_argument(
+                    "--llm-kwargs",
+                    default="{}",
+                    help="Additional LLM kwargs as a JSON string, e.g. '{\"top_p\": 0.9}'",
+                )
+                fn_parser.add_argument(
+                    "--generate-kwargs",
+                    default="{}",
+                    help="",
+                )
+                continue
+            arg_name = f"--{name.replace('_', '-')}"
+            default = param.default
+            if default is inspect.Parameter.empty:
+                fn_parser.add_argument(arg_name, required=True)
+            else:
+                if isinstance(default, bool):
+                    fn_parser.add_argument(
+                        arg_name,
+                        action="store_true" if not default else "store_false",
+                        help=f"(default: {default})",
+                    )
+                elif default is None:
+                    fn_parser.add_argument(arg_name, type=str, default=None)
+                else:
+                    fn_parser.add_argument(
+                        arg_name, type=type(default), default=default
+                    )
+
+        fn_args = fn_parser.parse_args(extra)
+        fn_kwargs = vars(fn_args)
+
+        if "llm_kwargs" in fn_kwargs and isinstance(fn_kwargs["llm_kwargs"], str):
+            try:
+                fn_kwargs["llm_kwargs"] = json.loads(fn_kwargs["llm_kwargs"])
+            except json.JSONDecodeError:
+                print("⚠️  Could not parse --llm-kwargs JSON, passing as string.")
+
+        if fn_kwargs.get("model_args") is not None:
+            try:
+                fn_kwargs["model_args"] = json.loads(fn_kwargs["model_args"])
+            except json.JSONDecodeError:
+                raise
+
+        if fn_kwargs.get("generate_kwargs") is not None:
+            try:
+                fn_kwargs["generate_kwargs"] = json.loads(fn_kwargs["generate_kwargs"])
+            except json.JSONDecodeError:
+                raise
+
+        print(f"🔹 Running {args.method} inference with arguments:")
+        for k, v in fn_kwargs.items():
+            print(f"  {k}: {v}")
+
+        results = inference_fn(**fn_kwargs)
+        print(f"✅ Inference complete. Generated {len(results)} results.")
+
     else:
         parser.print_help()
 
 
@@ -0,0 +1,2 @@
+REPO_ID = "llmsql-bench/llmsql-benchmark"
+DEFAULT_WORKDIR_PATH = "llmsql_workdir"
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+REPO_ID = "llmsql-bench/llmsql-benchmark"`
	`2`	`+DEFAULT_WORKDIR_PATH = "llmsql_workdir"`