stochasticai · glennko · Nov 9, 2025 · Nov 16, 2025
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@
 
 ___
 
-`xTuring` makes it simple, fast, and cost‑efficient to fine‑tune open‑source LLMs (e.g., GPT‑OSS, LLaMA/LLaMA 2, Falcon, GPT‑J, GPT‑2, OPT, Bloom, Cerebras, Galactica) on your own data — locally or in your private cloud.
+`xTuring` makes it simple, fast, and cost‑efficient to fine‑tune open‑source LLMs (e.g., GPT‑OSS, LLaMA/LLaMA 2, Falcon, Qwen3, GPT‑J, GPT‑2, OPT, Bloom, Cerebras, Galactica) on your own data — locally or in your private cloud.
 
 Why xTuring:
 - Simple API for data prep, training, and inference
@@ -162,6 +162,17 @@ outputs = model.generate(dataset = dataset, batch_size=10)
 
 ```
 
+7. __Qwen3 0.6B supervised fine-tuning__ – The lightweight Qwen3 0.6B checkpoint now has first-class support (registry, configs, docs, and examples) so you can launch SFT/LoRA jobs immediately.
+```python
+from xturing.datasets import InstructionDataset
+from xturing.models import BaseModel
+
+dataset = InstructionDataset("./examples/models/llama/alpaca_data")
+model = BaseModel.create("qwen3_0_6b_lora")
+model.finetune(dataset=dataset)
+```
+> See `examples/models/qwen3/qwen3_lora_finetune.py` for a runnable script.
+
 An exploration of the [Llama LoRA INT4 working example](examples/features/int4_finetuning/LLaMA_lora_int4.ipynb) is recommended for an understanding of its application.
 
 For an extended insight, consider examining the [GenericModel working example](examples/features/generic/generic_model.py) available in the repository.
@@ -290,7 +301,7 @@ Replace `<model_path>` with a local directory or a Hugging Face model like `face
 - [x] Dataset generation using self-instruction
 - [x] Low-precision LoRA fine-tuning and unsupervised fine-tuning
 - [x] INT8 low-precision fine-tuning support
-- [x] OpenAI, Cohere and AI21 Studio model APIs for dataset generation
+- [x] OpenAI, Cohere, and Claude model APIs for dataset generation
 - [x] Added fine-tuned checkpoints for some models to the hub
 - [x] INT4 LLaMA LoRA fine-tuning demo
 - [x] INT4 LLaMA LoRA fine-tuning with INT4 generation

diff --git a/docs/docs/advanced/generate.md b/docs/docs/advanced/generate.md
@@ -26,23 +26,23 @@ engine = Davinci("your-api-key")
 
   </TabItem>
 
-  <TabItem value="cohere" label="Cohere">
-
-  ```python
-  from xturing.model_apis.cohere import Medium
-  engine = Medium("your-api-key")
-  ```
-
-  </TabItem>
-  <TabItem value="ai21" label="AI21">
-
-  ```python
-  from xturing.model_apis.ai21 import J2Grande
-  engine = J2Grande("your-api-key")
-  ```
-
-  </TabItem>
-</Tabs>
+  <TabItem value="cohere" label="Cohere">
+
+  ```python
+  from xturing.model_apis.cohere import Medium
+  engine = Medium("your-api-key")
+  ```
+
+  </TabItem>
+  <TabItem value="claude" label="Claude">
+
+  ```python
+  from xturing.model_apis.claude import ClaudeSonnet
+  engine = ClaudeSonnet("your-api-key")
+  ```
+
+  </TabItem>
+</Tabs>
 
 ## From no data
 

diff --git a/examples/datasets/create_alpaca_dataset.ipynb b/examples/datasets/create_alpaca_dataset.ipynb
@@ -42,9 +42,9 @@
     "#\n",
     "# engine = Medium(\"your-api-key\")\n",
     "\n",
-    "# Alternatively, you can use AI21 to generate dataset\n",
+    "# Alternatively, you can use Claude to generate dataset\n",
     "\n",
-    "# from xturing.model_apis.ai21 import J2Grande\n",
+    "# from xturing.model_apis.claude import ClaudeSonnet\n",
     "#\n",
     "# engine = J2Grande(\"your-api-key\")"
    ],
@@ -100,4 +100,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/examples/datasets/create_instruction_dataset_from_files.ipynb b/examples/datasets/create_instruction_dataset_from_files.ipynb
@@ -46,9 +46,9 @@
     "#\n",
     "# engine = Medium(\"your-api-key\")\n",
     "\n",
-    "# Alternatively, you can use AI21 to generate dataset\n",
+    "# Alternatively, you can use Claude to generate dataset\n",
     "\n",
-    "# from xturing.model_apis.ai21 import J2Grande\n",
+    "# from xturing.model_apis.claude import ClaudeSonnet\n",
     "#\n",
     "# engine = J2Grande(\"your-api-key\")"
    ]
@@ -124,4 +124,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/examples/features/dataset_generation/create_alpaca_dataset.ipynb b/examples/features/dataset_generation/create_alpaca_dataset.ipynb
@@ -42,9 +42,9 @@
     "#\n",
     "# engine = Medium(\"your-api-key\")\n",
     "\n",
-    "# Alternatively, you can use AI21 to generate dataset\n",
+    "# Alternatively, you can use Claude to generate dataset\n",
     "\n",
-    "# from xturing.model_apis.ai21 import J2Grande\n",
+    "# from xturing.model_apis.claude import ClaudeSonnet\n",
     "#\n",
     "# engine = J2Grande(\"your-api-key\")"
    ],
@@ -100,4 +100,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,7 +43,7 @@ keywords = [
 dependencies = [
     "torch >= 1.9.0",
     "pytorch-lightning",
-    "transformers>=4.53.0",
+    "transformers>=4.36.0",
     "datasets==2.14.5",
     "pyarrow >= 8.0.0, < 21.0.0",
     "scipy >= 1.0.0",
@@ -54,8 +54,8 @@ dependencies = [
     "gradio>=5.31.0",
     "click",
     "wget",
-    "ai21",
     "cohere",
+    "anthropic",
     "ipywidgets",
     "openai >= 0.27.0",
     "pydantic >= 1.10.0",

diff --git a/src/xturing/model_apis/__init__.py b/src/xturing/model_apis/__init__.py
@@ -1,6 +1,5 @@
-from xturing.model_apis.ai21 import AI21TextGenerationAPI
-from xturing.model_apis.ai21 import J2Grande as AI21J2Grande
 from xturing.model_apis.base import BaseApi, TextGenerationAPI
+from xturing.model_apis.claude import ClaudeSonnet, ClaudeTextGenerationAPI
 from xturing.model_apis.cohere import CohereTextGenerationAPI
 from xturing.model_apis.cohere import Medium as CohereMedium
 from xturing.model_apis.openai import ChatGPT as OpenAIChatGPT
@@ -9,8 +8,8 @@
 
 BaseApi.add_to_registry(OpenAITextGenerationAPI.config_name, OpenAITextGenerationAPI)
 BaseApi.add_to_registry(CohereTextGenerationAPI.config_name, CohereTextGenerationAPI)
-BaseApi.add_to_registry(AI21TextGenerationAPI.config_name, AI21TextGenerationAPI)
+BaseApi.add_to_registry(ClaudeTextGenerationAPI.config_name, ClaudeTextGenerationAPI)
 BaseApi.add_to_registry(OpenAIDavinci.config_name, OpenAIDavinci)
 BaseApi.add_to_registry(OpenAIChatGPT.config_name, OpenAIChatGPT)
 BaseApi.add_to_registry(CohereMedium.config_name, CohereMedium)
-BaseApi.add_to_registry(AI21J2Grande.config_name, AI21J2Grande)
+BaseApi.add_to_registry(ClaudeSonnet.config_name, ClaudeSonnet)
diff --git a/src/xturing/model_apis/ai21.py b/src/xturing/model_apis/ai21.py
diff --git a/src/xturing/model_apis/claude.py b/src/xturing/model_apis/claude.py
@@ -0,0 +1,131 @@
+import time
+from datetime import datetime
+
+try:
+    from anthropic import (
+        APIConnectionError as AnthropicAPIConnectionError,
+        APIError as AnthropicAPIError,
+        Anthropic,
+        RateLimitError as AnthropicRateLimitError,
+    )
+except ModuleNotFoundError as import_err:  # pragma: no cover - optional dependency
+    Anthropic = None
+    AnthropicAPIError = AnthropicAPIConnectionError = AnthropicRateLimitError = Exception
+    _ANTHROPIC_IMPORT_ERROR = import_err
+else:  # pragma: no cover - dependency import paths exercised in runtime envs
+    _ANTHROPIC_IMPORT_ERROR = None
+
+from xturing.model_apis.base import TextGenerationAPI
+
+
+class ClaudeTextGenerationAPI(TextGenerationAPI):
+    config_name = "claude"
+
+    def __init__(self, model, api_key, request_batch_size=1):
+        self._ensure_dependency()
+        super().__init__(engine=model, api_key=api_key, request_batch_size=request_batch_size)
+        self._client = Anthropic(api_key=api_key)
+
+    @staticmethod
+    def _ensure_dependency():
+        if Anthropic is None:
+            message = (
+                "The anthropic SDK is required for ClaudeTextGenerationAPI. "
+                "Install it with `pip install anthropic`."
+            )
+            raise ModuleNotFoundError(message) from _ANTHROPIC_IMPORT_ERROR
+
+    def _make_request(self, prompt, max_tokens, temperature, top_p, stop_sequences):
+        params = {
+            "model": self.engine,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "messages": [{"role": "user", "content": prompt}],
+        }
+        if top_p is not None:
+            params["top_p"] = top_p
+        if stop_sequences:
+            params["stop_sequences"] = stop_sequences
+        return self._client.messages.create(**params)
+
+    @staticmethod
+    def _render_response(response):
+        if response is None:
+            return None
+        text_chunks = []
+        for block in getattr(response, "content", []):
+            if getattr(block, "type", None) == "text":
+                text_chunks.append(getattr(block, "text", ""))
+        predicts = {
+            "choices": [
+                {
+                    "text": "".join(text_chunks),
+                    "finish_reason": getattr(response, "stop_reason", "eos"),
+                }
+            ]
+        }
+        return predicts
+
+    def generate_text(
+        self,
+        prompts,
+        max_tokens,
+        temperature,
+        top_p=None,
+        frequency_penalty=None,
+        presence_penalty=None,
+        stop_sequences=None,
+        logprobs=None,
+        n=1,
+        best_of=1,
+        retries=3,
+        **kwargs,
+    ):
+        if not isinstance(prompts, list):
+            prompts = [prompts]
+
+        results = []
+        for prompt in prompts:
+            response = None
+            retry_cnt = 0
+            backoff_time = 30
+            while retry_cnt <= retries:
+                try:
+                    response = self._make_request(
+                        prompt=prompt,
+                        max_tokens=max_tokens,
+                        temperature=temperature,
+                        top_p=top_p,
+                        stop_sequences=stop_sequences,
+                    )
+                    break
+                except (
+                    AnthropicAPIError,
+                    AnthropicAPIConnectionError,
+                    AnthropicRateLimitError,
+                ) as e:
+                    print(f"ClaudeError: {e}.")
+                    print(f"Retrying in {backoff_time} seconds...")
+                    time.sleep(backoff_time)
+                    backoff_time *= 1.5
+                    retry_cnt += 1
+
+            data = {
+                "prompt": prompt,
+                "response": self._render_response(response),
+                "created_at": str(datetime.now()),
+            }
+            results.append(data)
+
+        return results
+
+
+class ClaudeSonnet(ClaudeTextGenerationAPI):
+    config_name = "claude_3_sonnet"
+
+    def __init__(self, api_key, request_batch_size=1):
+        super().__init__(
+            model="claude-3-sonnet-20240229",
+            api_key=api_key,
+            request_batch_size=request_batch_size,
+        )
diff --git a/tests/xturing/model_apis/__init__.py b/tests/xturing/model_apis/__init__.py