zju-bmi-lab
diff --git a/‎darkit/core/predicter.py‎
Lines changed: 5 additions & 2 deletions b/‎darkit/core/predicter.py‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎darkit/core/trainer.py‎
Lines changed: 6 additions & 6 deletions b/‎darkit/core/trainer.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎darkit/lm/command.py‎
Lines changed: 10 additions & 5 deletions b/‎darkit/lm/command.py‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎docs/2.User-guide/2.How-use-web.md‎
Lines changed: 15 additions & 2 deletions b/‎docs/2.User-guide/2.How-use-web.md‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎docs/2.User-guide/3.How-use-cli.md‎
Lines changed: 11 additions & 1 deletion b/‎docs/2.User-guide/3.How-use-cli.md‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎docs/2.User-guide/4.How-use-model.md‎
Lines changed: 57 additions & 12 deletions b/‎docs/2.User-guide/4.How-use-model.md‎
Lines changed: 57 additions & 12 deletions
@@ -116,10 +116,13 @@ def from_pretrained(
         device: Optional[str] = None,
         checkpoint: Optional[str] = None,
     ):
+        """
+        不可以直接从父 Predicter 类实例化，需要保证 root 路径一致，否则无法根据 name 找到模型
+        """
+        model = cls.get_model(name, checkpoint)
+        model = cls.inject_script(model, name)
         trainer_config = cls.get_trainer_config_json(name)
         device = device if device else trainer_config.get("device", "cuda")
-        model = cls.get_model(name, checkpoint).to(device)
-        model = cls.inject_script(model, name)
         return cls(name, model, device=device)
 
     def _predict(self, ctx):
 
@@ -197,7 +197,7 @@ def __new__(
 
     @property
     def root(self) -> Path:
-        return MODEL_PATH
+        return MODEL_PATH / "base"
 
     @property
     def save_directory(self) -> Optional[Path]:
@@ -305,13 +305,13 @@ def _save_external_config(self):
     def _copy_model_code(self):
         try:
             if self.save_directory:
-                model_py_path = inspect.getfile(self.model.__class__)
-                with open(model_py_path, "r", encoding="utf-8") as f:
-                    model_source_code = f.read()
-                    with open(self.model_code_archive_path, "w", encoding="utf-8") as f:
-                        f.write(model_source_code)
+                model_source_code = inspect.getsource(self.model.__class__)
+                with open(self.model_code_archive_path, "w", encoding="utf-8") as f:
+                    f.write(model_source_code)
         except OSError as e:
             print("Save model code failed:", e)
+        except TypeError as e:
+            print("Cannot retrieve source code for built-in class:", e)
 
     def save_pretrained(self, check_poinent="complete"):
         """
 
@@ -26,14 +26,19 @@ def show():
 
     click.echo("TRAINED MODELS:")
     # 读取 MODEL_PATH 下的模型文件夹，输出模型名称
-    for i, model in enumerate(MODEL_PATH.iterdir()):
-        if model.is_dir():  # 排除__options__.json文件
+    for model in MODEL_PATH.iterdir():
+        if model.is_dir() and any(
+            version.suffix == ".pth" for version in model.iterdir()
+        ):  # 只展示包含 .pth 文件的文件夹
             click.echo(f"  - {model.name}")
             # 模型文件夹下的每个 pth 文件都是一个版本的训练好的模型权重
             # 以 model:version 的形式输出每个版本
-            for j, version in enumerate(model.iterdir()):
+            i = 1
+            # 按照文件修改时间排序
+            for version in sorted(model.iterdir(), key=lambda x: x.stat().st_mtime):
                 if version.suffix == ".pth":
-                    click.echo(f"    {j + 1}. {version.stem}")
+                    click.echo(f"    {i}. {version.stem}")
+                    i += 1
     click.echo()
 
 
@@ -62,7 +67,7 @@ def predict(model_type: str, model_name: str, prompt: str, device: str, ctx_len:
     Examples: darkit predict SpikeGPT SpikeGPT:complete "I am" --tokenizer gpt2 --ctx_len 512
     """
     import torch
-    from darkit.core import Predicter
+    from .main import Predicter
 
     # model_name = MODEL_NAME:MODEL_VERSION
     # 把 model_name 拆分为 model_name 和 version，如果没有 version 则默认为 'complete'
 
@@ -22,6 +22,9 @@ After setting the parameters of the model, click the `Train` button at the botto
 
 After the training is completed, the model will be saved (if the user chooses to save the model in the settings) to the DarwinKit model directory (default is `~/.cache/DarwinKit`, which can be modified by changing the `DSPIKE_HOME` environment variable). Then users can use the model for prediction on the prediction page or view the training logs and parameters of the model on the visualization page.
 
+#### Resume Training
+If users need to resume training based on the weights of a previously trained model, they only need to select the previously trained model in the `Resume` dropdown box on the training page, and then click the `Train` button to continue training from the last checkpoint.
+
 ### Predict Models
 Users can use trained models for prediction. On the prediction page, users can select a trained model, then input text, and click the predict button to get the prediction result of the model.
 
@@ -30,11 +33,21 @@ All trained models will be displayed in the `Model Name` dropdown box, and users
 
 After starting the prediction, the output of the model will be displayed on the page in real-time.
 
+### Model Forking
+The model forking feature provides developers with a new way to customize models. By using the `Fork` operation, you can create a forked version based on an existing model and edit, manage, and train it. Users select an existing model to `Fork`, creating a new forked model that ensures all subsequent operations are independent of the original model, protecting the integrity and security of the original model.
+
+**User Guide**
+1. On the train models page, select a model and set the relevant parameters. Click the Fork button, enter the name of the forked model in the pop-up box, and click Create fork in the pop-up box to create the forked model.
+  ![Fork Step 1](/static/docs/fork/step1.png)
+2. After creating the forked model, it will automatically jump to the model editing page. The introduction of the editing page is as follows.
+  ![Fork Step 2](/static/docs/fork/step2.png)
+3. After editing, click the fork in the sidebar to enter the management page of the forked model. In the Forked Model dropdown box, you can select the forked model. After selecting the model, you can click the `View & Edit` button to enter the editing page of the forked model, or click the `Train` button to train the model.
+  ![Fork Step 3](/static/docs/fork/step3.png)
+
 ### Model Visualization
 Users can view the training logs and parameters of trained models. On the visualization page, users can select trained models (multiple models can be selected for data comparison), and click the view button to see the visualized charts of the parameters.
 
 The schematic diagram is as follows:
 ![model visual](/static/docs/visual.jpg)
 
-If the model is in training, the page will update the data in real-time.
-
+If the model is in training, the page will update the data in real-time.
@@ -14,9 +14,19 @@ Options:
 
 Commands:
   create-options  Generate the configuration file for the model.
-  predict         Use the trained SNN model for inference. Optional model types can be viewed using the command DarwinKit show...
+  predict         Use the trained SNN model for inference. Optional model types can be viewed using the command darkit show...
   show            Display the available model_types, datasets, or...
   start           Start the WEB service.
   train           Train the SNN model.
 ```
 
+## Example
+```bash
+# Train the model
+darkit lm train --tokenizer openai-community/gpt2 --dataset Salesforce/wikitext:wikitext-103-raw-v1  SpikeGPT --vocab_size 30500 --ctx_len 1024
+# Use the model for prediction
+darkit lm predict SpikeGPT $model_name $prompt --device cuda
+# View trained models
+darkit lm show
+```
+
@@ -33,10 +33,10 @@ n_embd = 768
 
 config = SpikeGPTConfig(
     tokenizer.vocab_size,
-    train_dataset.ctx_len,
-    model_type=model_type,
-    n_layer=n_layer,
-    n_embd=n_embd,
+    ctx_len=ctx_len,
+    model_type="RWKV",
+    n_layer=12,
+    n_embd=768,
 )
 model = SpikeGPT(config).cuda()
 ```
@@ -50,19 +50,19 @@ from darkit import Trainer
 from darkit.models import TrainerConfig
 
 # Parameter configuration
-model_name = f"GPT-Test-Train-{random.randint(1000, 9999)}"
+model_name = "GPT-1"
 tconf = TrainerConfig(
     name=model_name,
-    device="cuda",
+    device=device,
     max_epochs=1,
     epoch_length_fixed=100,
     batch_size=2,
-    epoch_save_frequency=1,
+    save_step_interval=1,
 )
 # Configure the model, dataset, and tokenizer
-trainer = Trainer(model, tokenizer=tokenizer, config=tconf)
-# Start training
-trainer.train(train_dataset=wikitext_train)
+with Trainer(model, tokenizer=tokenizer, config=tconf) as trainer:
+    # Start training
+    trainer.train(train_dataset=wikitext_train)
 ```
 The `TrainerConfig` class is used to configure the training parameters. Specific parameters can be referenced in the definition of the `TrainerConfig` class.
 
@@ -71,13 +71,13 @@ The `TrainerConfig` class is used to configure the training parameters. Specific
 ### Saving and Loading the Model
 During model training, the logic for saving the model is generally controlled according to the settings in `TrainerConfig`. For example, in the `TrainerConfig` of `SpikeGPT`, we can set `save_step_interval` to control the interval for saving the model.
 
-The path for saving the model is determined based on the values of `tconf.name` and the `DSPIKE_HOME` environment variable.
+The path for saving the model is determined based on the values of `tconf.name` and the `DARWIN_KIT_HOME` environment variable.
 
 ### Generating Text
 After training is complete, the trained model can be loaded using the model name set during training. We can use the following code to generate text:
 
 ```python
-from darkit import Predicter
+from darkit.lm.main import Predicter
 predicter = Predicter.from_pretrained(model_name)
 
 prompt = "I am"
@@ -93,3 +93,48 @@ We can use the `predict` method to generate text. The `predict` method accepts a
 The schematic diagram is as follows:
 
 ![SpikeGPT Run](/static/docs/SpikeGPTRun.gif)
+
+## Complete Code
+```python
+from datasets import load_dataset
+from transformers import AutoTokenizer, GPT2Tokenizer
+from darkit.lm.main import Trainer, Predicter
+from darkit.lm.models.SpikeGPT import SpikeGPT, SpikeGPTConfig, TrainerConfig
+
+device = "cuda"
+ctx_len = 64
+
+tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
+tokenizer.pad_token = tokenizer.eos_token
+
+wikitext = load_dataset("Salesforce/wikitext", "wikitext-103-raw-v1")
+wikitext_train = wikitext["train"]  # type: ignore
+
+model_name = "GPT-1"
+config = SpikeGPTConfig(
+    tokenizer.vocab_size,
+    ctx_len=ctx_len,
+    model_type="RWKV",
+    n_layer=12,
+    n_embd=768,
+)
+model = SpikeGPT(config)
+tconf = TrainerConfig(
+    name=model_name,
+    device=device,
+    max_epochs=1,
+    epoch_length_fixed=100,
+    batch_size=2,
+    save_step_interval=1,
+)
+with Trainer(model, tokenizer=tokenizer, config=tconf) as trainer:
+    trainer.train(train_dataset=wikitext_train)
+
+# Test the model
+predicter = Predicter.from_pretrained(model_name)
+prompt = "hello world"
+print(prompt, end="")
+for char in predicter.predict(prompt, ctx_len=ctx_len):
+    print(char, end="", flush=True)
+print()
+```