diff --git a/.gitignore b/.gitignore
index ee0c297d..c2b0a113 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 **/__pycache__
 data
-checkpoint
-runs
\ No newline at end of file
+checkpoint*
+runs
+net_ver
diff --git a/cangjie_dataset.py b/cangjie_dataset.py
new file mode 100644
index 00000000..4c0127ec
--- /dev/null
+++ b/cangjie_dataset.py
@@ -0,0 +1,91 @@
+import os
+import numpy as np
+from PIL import Image
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+
+class ETL952Dataset(Dataset):
+    def __init__(self, root_dir, folder_name, transform=None):
+        self.root_dir = os.path.join(root_dir, 'data', 'etl_952_singlechar_size_64', 'etl_952_singlechar_size_64', folder_name)
+        
+        if transform is None:
+            self.transform = transforms.Compose([
+                transforms.Resize((32, 32)),  # Resize to match SqueezeNet input size
+                transforms.ToTensor(),
+                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+            ])
+        else:
+            self.transform = transform
+        
+        self.classes = [str(i) for i in range(952)]
+        self.data = []  # Will store image data
+        self.labels = []  # Will store labels
+
+        # Load all images into memory
+        for class_idx, class_name in enumerate(self.classes):
+            if class_idx%10==0:
+                print(f"start loading class {class_name}/952 from {folder_name}")
+            class_path = os.path.join(self.root_dir, class_name)
+            for img_name in os.listdir(class_path):
+                if img_name.lower().endswith(('.png', )):
+                    img_path = os.path.join(class_path, img_name)
+                    image = Image.open(img_path).convert('RGB')
+                    image = image.resize((32, 32))  # Resize to 32x32
+                    image_array = np.array(image)
+                    self.data.append(image_array)
+                    self.labels.append(class_idx)
+
+        # Convert to numpy arrays
+        self.data = np.array(self.data)
+        self.labels = np.array(self.labels)
+
+    def __len__(self):
+        return len(self.labels)
+    
+    def __getitem__(self, idx):
+        image = self.data[idx]
+        label = self.labels[idx]
+
+        # Convert numpy array to PIL Image
+        image = Image.fromarray(image)
+
+        if self.transform:
+            image = self.transform(image)
+
+        return image, label
+
+class ETL952Train(ETL952Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ETL952Train, self).__init__(root_dir, folder_name="952_train", transform=transform)
+class ETL952Test(ETL952Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ETL952Test, self).__init__(root_dir, folder_name="952_test", transform=transform)
+class ETL952Val(ETL952Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ETL952Val, self).__init__(root_dir, folder_name="952_val", transform=transform)
+
+import pandas as pd
+class ETL952Labels():
+    
+    def __init__(self,path=os.path.join("data","etl_952_singlechar_size_64","etl_952_singlechar_size_64","952_labels.txt")):
+        self.path=path
+        self.data=pd.read_csv(path,sep=" ",header=0,names=['label', 'character', 'JISx0208', 'UTF8', 'Cangjie'])
+        self.data=self.data.to_numpy()
+        # for i in range(len(self.data)):
+        #     self.data[i][4]=(self.data[i][4]+"      ")[:5]
+
+
+# from torch.utils.data import DataLoader
+# # train_set = ETL952Train(root_dir="pytorch-cifar100", transform=transforms.ToTensor())
+
+# train_set = ETL952Train(root_dir="", transform=transforms.ToTensor())
+# train_loader = DataLoader(train_set, batch_size=128, shuffle=True,num_workers=4)
+# train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
+
+# print("total number of train data: ", len(train_set))
+
+# for images, labels in train_loader:
+#     print("images shape: ", images.shape)
+#     print("labels shape: ", labels.shape)
+#     break
\ No newline at end of file
diff --git a/cangjie_dataset_b3.py b/cangjie_dataset_b3.py
new file mode 100644
index 00000000..cb65d3a8
--- /dev/null
+++ b/cangjie_dataset_b3.py
@@ -0,0 +1,91 @@
+import os
+import numpy as np
+from PIL import Image
+import torch
+from torch.utils.data import Dataset
+from torchvision import transforms
+import pandas as pd
+
+class ETL952Dataset(Dataset):
+    def __init__(self, root_dir, folder_name, transform=None):
+        self.root_dir = os.path.join(root_dir, 'data', 'etl_952_singlechar_size_64', 'etl_952_singlechar_size_64', folder_name)
+        self.dict_file_path = os.path.join(root_dir, 'data', 'etl_952_singlechar_size_64', 'etl_952_singlechar_size_64',"952_labels.txt")
+        if transform is None:
+            self.transform = transforms.Compose([
+                transforms.Resize((32, 32)),  # Resize to match SqueezeNet input size
+                transforms.ToTensor(),
+                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+            ])
+        else:
+            self.transform = transform
+        
+        self.classes = [str(i) for i in range(952)]
+        self.data = []  # Will store image data
+        self.labels = []  # Will store labels
+        self.string_labels = []  # Will store string labels
+
+        self.string_label_data = pd.read_csv(self.dict_file_path, sep=" ", header=0, names=['label', 'character', 'JISx0208', 'UTF8', 'Cangjie']).to_numpy()
+
+
+        # Load all images into memory
+        for class_idx, class_name in enumerate(self.classes):
+            if class_idx%10==0:
+                print(f"start loading class {class_name}/952 from {folder_name}")
+            class_path = os.path.join(self.root_dir, class_name)
+            for img_name in os.listdir(class_path):
+                if img_name.lower().endswith(('.png', )):
+                    img_path = os.path.join(class_path, img_name)
+                    image = Image.open(img_path).convert('RGB')
+                    image = image.resize((32, 32))  # Resize to 32x32
+                    image_array = np.array(image)
+                    self.data.append(image_array)
+                    self.labels.append(class_idx)
+                    self.string_labels.append(self.string_label_data[class_idx][4])
+
+        # Convert to numpy arrays
+        self.data = np.array(self.data)
+        self.labels = np.array(self.labels)
+        self.string_labels = np.array(self.string_labels)
+
+    def __len__(self):
+        return len(self.labels)
+    
+    def __getitem__(self, idx):
+        image = self.data[idx]
+        label = self.labels[idx]
+        string_label = self.string_labels[idx]
+
+        # Convert numpy array to PIL Image
+        image = Image.fromarray(image)
+
+        if self.transform:
+            image = self.transform(image)
+
+        return image, label, string_label
+
+class ETL952Train(ETL952Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ETL952Train, self).__init__(root_dir, folder_name="952_train", transform=transform)
+class ETL952Test(ETL952Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ETL952Test, self).__init__(root_dir, folder_name="952_test", transform=transform)
+class ETL952Val(ETL952Dataset):
+    def __init__(self, root_dir, transform=None):
+        super(ETL952Val, self).__init__(root_dir, folder_name="952_val", transform=transform)
+
+
+
+
+# from torch.utils.data import DataLoader
+# # train_set = ETL952Train(root_dir="pytorch-cifar100", transform=transforms.ToTensor())
+
+# train_set = ETL952Train(root_dir="", transform=transforms.ToTensor())
+# train_loader = DataLoader(train_set, batch_size=128, shuffle=True,num_workers=4)
+# train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
+
+# print("total number of train data: ", len(train_set))
+
+# for images, labels in train_loader:
+#     print("images shape: ", images.shape)
+#     print("labels shape: ", labels.shape)
+#     break
\ No newline at end of file
diff --git a/cangjie_models/sqnetC3579.py b/cangjie_models/sqnetC3579.py
new file mode 100644
index 00000000..8938f5d9
--- /dev/null
+++ b/cangjie_models/sqnetC3579.py
@@ -0,0 +1,97 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetC3579(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            # nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 96, 3, padding=1),
+            nn.BatchNorm2d(96),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(96, 128, 16)
+        # self.fire3 = Fire(128, 128, 16)
+        self.fire4 = Fire(128, 256, 32)
+        # self.fire5 = Fire(256, 256, 32)
+        self.fire6 = Fire(256, 384, 48)
+        # self.fire7 = Fire(384, 384, 48)
+        self.fire8 = Fire(384, 512, 64)
+        # self.fire9 = Fire(512, 512, 64)
+        self.conv10 = nn.Conv2d(512, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        # f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f2)
+        f4 = self.maxpool(f4)
+
+        # f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f4)
+        # f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f6)
+        f8 = self.maxpool(f8)
+
+        # f9 = self.fire9(f8)
+        c10 = self.conv10(f8)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetc3579(class_num=952):
+    return SqNetC3579(class_num=class_num)
diff --git a/cangjie_models/sqnetC9.py b/cangjie_models/sqnetC9.py
new file mode 100644
index 00000000..548de812
--- /dev/null
+++ b/cangjie_models/sqnetC9.py
@@ -0,0 +1,97 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetC9(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            # nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 96, 3, padding=1),
+            nn.BatchNorm2d(96),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(96, 128, 16)
+        self.fire3 = Fire(128, 128, 16)
+        self.fire4 = Fire(128, 256, 32)
+        self.fire5 = Fire(256, 256, 32)
+        self.fire6 = Fire(256, 384, 48)
+        self.fire7 = Fire(384, 384, 48)
+        self.fire8 = Fire(384, 512, 64)
+        # self.fire9 = Fire(512, 512, 64)
+        self.conv10 = nn.Conv2d(512, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f3)
+        f4 = self.maxpool(f4)
+
+        f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f5)
+        f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f7)
+        f8 = self.maxpool(f8)
+
+        # f9 = self.fire9(f8)
+        c10 = self.conv10(f8)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetc9(class_num=952):
+    return SqNetC9(class_num=class_num)
diff --git a/cangjie_models/sqnetD4.py b/cangjie_models/sqnetD4.py
new file mode 100644
index 00000000..d1d100e2
--- /dev/null
+++ b/cangjie_models/sqnetD4.py
@@ -0,0 +1,100 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetD4(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            nn.MaxPool2d(2, 2),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f3)
+        f4 = self.maxpool(f4)
+
+        f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f5)
+        f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f7)
+        f8 = self.maxpool(f8)
+
+        f9 = self.fire9(f8)
+        c10 = self.conv10(f9)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetd4(class_num=952):
+    return SqNetD4(class_num=class_num)
diff --git a/cangjie_models/sqnetD4C3579.py b/cangjie_models/sqnetD4C3579.py
new file mode 100644
index 00000000..b24c23b9
--- /dev/null
+++ b/cangjie_models/sqnetD4C3579.py
@@ -0,0 +1,100 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetD4C3579(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            nn.MaxPool2d(2, 2),
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        # self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        # self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        # self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        # self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        # f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f2)
+        f4 = self.maxpool(f4)
+
+        # f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f4)
+        # f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f6)
+        f8 = self.maxpool(f8)
+
+        # f9 = self.fire9(f8)
+        c10 = self.conv10(f8)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetd4c3579(class_num=952):
+    return SqNetD4C3579(class_num=class_num)
diff --git a/cangjie_models/sqnetF4.py b/cangjie_models/sqnetF4.py
new file mode 100644
index 00000000..ec11195a
--- /dev/null
+++ b/cangjie_models/sqnetF4.py
@@ -0,0 +1,99 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetF4(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            # nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f3)
+        f4 = self.maxpool(f4)
+
+        f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f5)
+        f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f7)
+        f8 = self.maxpool(f8)
+
+        f9 = self.fire9(f8)
+        c10 = self.conv10(f9)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetf4(class_num=952):
+    return SqNetF4(class_num=class_num)
diff --git a/cangjie_models/sqnetF4C3579.py b/cangjie_models/sqnetF4C3579.py
new file mode 100644
index 00000000..ae70f037
--- /dev/null
+++ b/cangjie_models/sqnetF4C3579.py
@@ -0,0 +1,99 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetF4C3579(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            # nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        # self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        # self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        # self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        # self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        # f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f2)
+        f4 = self.maxpool(f4)
+
+        # f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f4)
+        # f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f6)
+        f8 = self.maxpool(f8)
+
+        # f9 = self.fire9(f8)
+        c10 = self.conv10(f8)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetf4c3579(class_num=952):
+    return SqNetF4C3579(class_num=class_num)
diff --git a/cangjie_models/sqnetR.py b/cangjie_models/sqnetR.py
new file mode 100644
index 00000000..0c004b1c
--- /dev/null
+++ b/cangjie_models/sqnetR.py
@@ -0,0 +1,99 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetR(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 48, 3, padding=1),
+            nn.BatchNorm2d(48),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(48, 64, 8)
+        self.fire3 = Fire(64, 64, 8)
+        self.fire4 = Fire(64, 128, 16)
+        self.fire5 = Fire(128, 128, 16)
+        self.fire6 = Fire(128, 192, 24)
+        self.fire7 = Fire(192, 192, 24)
+        self.fire8 = Fire(192, 256, 32)
+        self.fire9 = Fire(256, 256, 32)
+        self.conv10 = nn.Conv2d(256, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f3)
+        f4 = self.maxpool(f4)
+
+        f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f5)
+        f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f7)
+        f8 = self.maxpool(f8)
+
+        f9 = self.fire9(f8)
+        c10 = self.conv10(f9)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetr(class_num=952):
+    return SqNetR(class_num=class_num)
diff --git a/cangjie_models/sqnetR4.py b/cangjie_models/sqnetR4.py
new file mode 100644
index 00000000..71b542d8
--- /dev/null
+++ b/cangjie_models/sqnetR4.py
@@ -0,0 +1,99 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetR4(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f3)
+        f4 = self.maxpool(f4)
+
+        f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f5)
+        f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f7)
+        f8 = self.maxpool(f8)
+
+        f9 = self.fire9(f8)
+        c10 = self.conv10(f9)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetr4(class_num=952):
+    return SqNetR4(class_num=class_num)
diff --git a/cangjie_models/sqnetR4C3579.py b/cangjie_models/sqnetR4C3579.py
new file mode 100644
index 00000000..e16396ef
--- /dev/null
+++ b/cangjie_models/sqnetR4C3579.py
@@ -0,0 +1,99 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetR4C3579(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        # self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        # self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        # self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        # self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        # f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f2)
+        f4 = self.maxpool(f4)
+
+        # f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f4)
+        # f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f6)
+        f8 = self.maxpool(f8)
+
+        # f9 = self.fire9(f8)
+        c10 = self.conv10(f8)
+
+        x = self.avg(c10)
+        x = x.view(x.size(0), -1)
+
+        return x
+
+def sqnetr4c3579(class_num=952):
+    return SqNetR4C3579(class_num=class_num)
diff --git a/cangjie_models/sqnetR4C3579RNN.py b/cangjie_models/sqnetR4C3579RNN.py
new file mode 100644
index 00000000..13ba1566
--- /dev/null
+++ b/cangjie_models/sqnetR4C3579RNN.py
@@ -0,0 +1,109 @@
+"""squeezenet in pytorch
+
+
+
+[1] Song Han, Jeff Pool, John Tran, William J. Dally
+
+    squeezenet: Learning both Weights and Connections for Efficient Neural Networks
+    https://arxiv.org/abs/1506.02626
+"""
+
+#all hyperparameter divided by 2
+
+import torch
+import torch.nn as nn
+
+
+class Fire(nn.Module):
+
+    def __init__(self, in_channel, out_channel, squzee_channel):
+
+        super().__init__()
+        self.squeeze = nn.Sequential(
+            nn.Conv2d(in_channel, squzee_channel, 1),
+            nn.BatchNorm2d(squzee_channel),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_1x1 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+        self.expand_3x3 = nn.Sequential(
+            nn.Conv2d(squzee_channel, int(out_channel / 2), 3, padding=1),
+            nn.BatchNorm2d(int(out_channel / 2)),
+            nn.ReLU(inplace=True)
+        )
+
+    def forward(self, x):
+
+        x = self.squeeze(x)
+        x = torch.cat([
+            self.expand_1x1(x),
+            self.expand_3x3(x)
+        ], 1)
+
+        return x
+
+class SqNetR4C3579RNN(nn.Module):
+
+    """mobile net with simple bypass"""
+    def __init__(self, class_num=952, vocab_size=26):
+
+        super().__init__()
+        self.stem = nn.Sequential(
+            nn.MaxPool2d(2, 2),
+            nn.Conv2d(3, 24, 3, padding=1),
+            nn.BatchNorm2d(24),
+            nn.ReLU(inplace=True),
+            nn.MaxPool2d(2, 2)
+        )
+
+        self.fire2 = Fire(24, 32, 4)
+        # self.fire3 = Fire(32, 32, 4)
+        self.fire4 = Fire(32, 64, 8)
+        # self.fire5 = Fire(64, 64, 8)
+        self.fire6 = Fire(64, 96, 12)
+        # self.fire7 = Fire(96, 96, 12)
+        self.fire8 = Fire(96, 128, 16)
+        # self.fire9 = Fire(128, 128, 16)
+        self.conv10 = nn.Conv2d(128, class_num, 1)
+        self.avg = nn.AdaptiveAvgPool2d(1)
+        self.maxpool = nn.MaxPool2d(2, 2)
+
+        # Sequence prediction head
+        self.lstm = nn.LSTM(128, 256, batch_first=True)
+        self.fc_seq = nn.Linear(256, vocab_size)
+
+    def forward(self, x):
+        x = self.stem(x)       
+
+        f2 = self.fire2(x)
+        # f3 = self.fire3(f2) + f2
+        f4 = self.fire4(f2)
+        f4 = self.maxpool(f4)
+
+        # f5 = self.fire5(f4) + f4
+        f6 = self.fire6(f4)
+        # f7 = self.fire7(f6) + f6
+        f8 = self.fire8(f6)
+        f8 = self.maxpool(f8)
+
+        # f9 = self.fire9(f8)
+        #classification head
+        c10 = self.conv10(f8)
+
+        x_cls = self.avg(c10)
+        x_cls = x_cls.view(x_cls.size(0), -1)
+        #sequence prediction head
+        f8_flat=f8.permute(0, 2, 3, 1).reshape(f8.size(0), -1, 128)
+        h_lstm, _ = self.lstm(f8_flat)
+        x_seq = self.fc_seq(h_lstm)
+
+
+        return x_cls, x_seq
+
+def sqnetr4c3579rnn(class_num=952,vocab_size=27):
+    return SqNetR4C3579RNN(class_num=class_num,vocab_size=vocab_size)
diff --git a/cangjie_test.py b/cangjie_test.py
new file mode 100644
index 00000000..2dba5050
--- /dev/null
+++ b/cangjie_test.py
@@ -0,0 +1,100 @@
+#test.py
+#!/usr/bin/env python3
+
+""" test neuron network performace
+print top1 and top5 err on test dataset
+of a model
+
+author baiyu
+"""
+
+import argparse
+
+from matplotlib import pyplot as plt
+
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+
+from conf import settings
+from cangjie_utils import  get_test_loader
+from models.squeezenet import squeezenet
+from cangjie_models.sqnetR import sqnetr
+from cangjie_models.sqnetC9 import sqnetc9
+from cangjie_models.sqnetC3579 import sqnetc3579
+from cangjie_models.sqnetF4C3579 import sqnetf4c3579
+from cangjie_models.sqnetR4 import sqnetr4
+from cangjie_models.sqnetR4C3579 import sqnetr4c3579
+from cangjie_models.sqnetD4 import sqnetd4
+from cangjie_models.sqnetD4C3579 import sqnetd4c3579
+from utils import get_network
+import numpy as np
+
+if __name__ == '__main__':
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-net', type=str, default='sqnetd4c3579', help='net type')
+    parser.add_argument('-weights', type=str, required=True, help='the weights file you want to test')
+    parser.add_argument('-gpu', action='store_true', default=False, help='use gpu or not')
+    parser.add_argument('-b', type=int, default=64, help='batch size for dataloader')
+    args = parser.parse_args()
+
+    net = sqnetd4c3579()
+    if args.gpu:
+        net = net.cuda()
+    
+    
+    ETL952TestLoader = get_test_loader(
+        num_workers=4,
+        batch_size=args.b,
+    )
+
+    net.load_state_dict(torch.load(args.weights))
+    print(net)
+    net.eval()
+
+    correct_1 = 0.0
+    correct_5 = 0.0
+    total = 0
+     
+    cnt = np.ndarray([952, 952])
+
+    with torch.no_grad():
+        
+        for n_iter, (image, label) in enumerate(ETL952TestLoader):
+            print("iteration: {}\ttotal {} iterations".format(n_iter + 1, len(ETL952TestLoader)))
+
+            if args.gpu:
+                image = image.cuda()
+                label = label.cuda()
+                # print('GPU INFO.....')
+                # print(torch.cuda.memory_summary(), end='')
+
+
+            output = net(image)
+            # print(output.shape)
+            _, pred = output.topk(5, 1, largest=True, sorted=True)
+
+            label = label.view(label.size(0), -1).expand_as(pred)
+            correct = pred.eq(label).float()
+
+            #compute top 5
+            correct_5 += correct[:, :5].sum()
+
+            #compute top1
+            correct_1 += correct[:, :1].sum()
+
+    if args.gpu:
+        print('GPU INFO.....')
+        print(torch.cuda.memory_summary(), end='')
+
+    print()
+    print("Top 1 err: ", 1 - correct_1 / len(ETL952TestLoader.dataset))
+    print("Top 5 err: ", 1 - correct_5 / len(ETL952TestLoader.dataset))
+    print("Parameter numbers: {}".format(sum(p.numel() for p in net.parameters())))
+
+
+# from models.squeezenet import SqueezeNet
+
+# model = SqueezeNet(class_num=100)
+# model.load_state_dict(torch.load('checkpoint\squeezenet\Saturday_13_July_2024_18h_05m_14s\squeezenet-200-regular.pth'))
\ No newline at end of file
diff --git a/cangjie_train.py b/cangjie_train.py
new file mode 100644
index 00000000..f939128b
--- /dev/null
+++ b/cangjie_train.py
@@ -0,0 +1,234 @@
+from cangjie_utils import get_training_loader,get_val_loader
+from models.squeezenet import squeezenet
+from time import perf_counter
+import argparse
+import os
+# from utils import WarmUpLR
+
+import torch
+import torch.nn as nn
+from conf import settings
+
+from utils import  WarmUpLR, \
+    most_recent_folder, most_recent_weights, last_epoch, best_acc_weights
+from torch.utils.tensorboard import SummaryWriter
+from cangjie_models.sqnetR import sqnetr
+from cangjie_models.sqnetC9 import sqnetc9
+from cangjie_models.sqnetC3579 import sqnetc3579
+from cangjie_models.sqnetF4 import sqnetf4
+from cangjie_models.sqnetF4C3579 import sqnetf4c3579
+from cangjie_models.sqnetR4 import sqnetr4
+from cangjie_models.sqnetR4C3579 import sqnetr4c3579
+from cangjie_models.sqnetD4 import sqnetd4
+from cangjie_models.sqnetD4C3579 import sqnetd4c3579
+def train(epoch):
+    start_time = perf_counter()
+    net.train()
+    for batch_index, (images, labels) in enumerate(ETL952TrainLoader):
+
+        if args.gpu:
+            labels = labels.cuda().long()
+            images = images.cuda()
+        else:
+            labels = labels.long()
+
+        optimizer.zero_grad()
+        outputs = net(images)
+        loss = loss_function(outputs, labels)
+        loss.backward()
+        optimizer.step()
+
+        n_iter = (epoch - 1) * len(ETL952TrainLoader) + batch_index + 1    
+        last_layer = list(net.children())[-1]
+        for name, para in last_layer.named_parameters():
+            if 'weight' in name:
+                writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter)
+            if 'bias' in name:
+                writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter)
+        print('Training Epoch: {epoch}/{total_epochs} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
+            loss.item(),
+            optimizer.param_groups[0]['lr'],
+            epoch=epoch,
+            total_epochs=settings.EPOCH,
+            trained_samples=batch_index * args.batch_size + len(images),
+            total_samples=len(ETL952TrainLoader.dataset)
+        ))
+
+        #update training loss for each iteration
+        writer.add_scalar('Train/loss', loss.item(), n_iter)
+
+        if epoch <= args.warm:
+            warmup_scheduler.step()
+
+    for name, param in net.named_parameters():
+        layer, attr = os.path.splitext(name)
+        attr = attr[1:]
+        writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
+
+    finish_time = perf_counter()
+    with open('train.log', 'a') as f:
+        f.write('epoch {} training time consumed: {:.2f}s\n'.format(epoch, finish_time - start_time))
+    print('epoch {} training time consumed: {:.2f}s'.format(epoch, finish_time - start_time))
+
+@torch.no_grad()
+def eval_training(epoch=0, tb=True):
+
+    start_time = perf_counter()
+    net.eval()
+    test_loss = 0.0 # cost function error
+    correct = 0.0
+
+    for batch_index, (images, labels) in enumerate(ETL952ValLoader):
+        if args.gpu:
+            labels = labels.cuda().long()
+            images = images.cuda()
+        else:
+            labels = labels.long()
+            
+        
+        outputs = net(images)
+        loss = loss_function(outputs, labels)
+        test_loss += loss.item()
+        _, preds = outputs.max(1)
+        correct += preds.eq(labels).sum()
+
+    finish_time = perf_counter()
+    print('Evaluating Network.....')
+    print('Epoch: {}, Test Loss: {:.4f}, Test Accuracy: {:.4f}, Time consumed:{:.2f}s'.format(
+            epoch, test_loss / len(ETL952ValLoader.dataset),
+            correct.float() / len(ETL952ValLoader.dataset),
+            finish_time - start_time
+        ))
+    print()
+    if tb:
+        writer.add_scalar('Test/Average loss', test_loss / len(ETL952ValLoader.dataset), epoch)
+        writer.add_scalar('Test/Accuracy', correct.float() / len(ETL952ValLoader.dataset), epoch)
+
+    return correct.float() / len(ETL952ValLoader.dataset)
+
+if __name__ == '__main__':
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-net', default='sqnetd4c3579', type=str, help='net type')
+    parser.add_argument('-gpu', type=bool, default=False, help='use gpu or not')
+    parser.add_argument('-batch_size', type=int, default=64, help='batch size for dataloader')
+    parser.add_argument('-warm', type=int, default=1, help='warm up training phase')
+    parser.add_argument('-lr', type=float, default=0.1, help='initial learning rate')
+    parser.add_argument('-resume', action='store_true', default=False, help='resume training')
+    args = parser.parse_args()
+
+    current_time = settings.TIME_NOW
+
+    
+
+    #model
+    net = sqnetd4c3579()
+    file_name = "net_ver"+current_time+".log"
+    net_ver_path = os.path.join('net_ver',file_name)
+    if not os.path.exists('net_ver'):
+        os.mkdir('net_ver')
+    with open(net_ver_path, "w") as f:
+        f.write(str(net))
+    print("assigned model")
+
+    #data
+    time_in = perf_counter()
+    print("start loading data")
+    ETL952TrainLoader = get_training_loader(batch_size=args.batch_size)
+    ETL952ValLoader = get_val_loader(batch_size=args.batch_size)
+    print("data loaded")
+    time_out = perf_counter()
+    with open("train.log", "a") as f:
+        f.write(f"net_ver {net.__class__.__name__} {current_time} ")
+        f.write(f"train at {current_time} ")
+        f.write("data loading time: {:.2f}s\n".format(time_out - time_in))
+
+
+    #setup model
+
+    loss_function = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
+    train_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay
+    iter_per_epoch = len(ETL952TrainLoader)
+    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)
+
+    
+
+    if args.resume:
+        recent_folder = most_recent_folder(os.path.join(settings.CHECKPOINT_PATH, args.net), fmt=settings.DATE_FORMAT)
+        if not recent_folder:
+            raise Exception('no recent folder were found')
+
+        checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder)
+
+    else:
+        checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW)
+
+    #use tensorboard
+    if not os.path.exists(settings.LOG_DIR):
+        os.mkdir(settings.LOG_DIR)
+
+    #since tensorboard can't overwrite old values
+    #so the only way is to create a new tensorboard log
+    writer = SummaryWriter(log_dir=os.path.join(
+            settings.LOG_DIR, args.net, current_time))
+    input_tensor = torch.Tensor(1, 3, 32, 32)
+    if args.gpu:
+        input_tensor = input_tensor.cuda()
+        net = net.cuda()
+    writer.add_graph(net, input_tensor)
+
+    #create checkpoint folder to save model
+    if not os.path.exists(checkpoint_path):
+        os.makedirs(checkpoint_path)
+    checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth')
+
+    best_acc = 0.0
+    if args.resume:
+        best_weights = best_acc_weights(os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder))
+        if best_weights:
+            weights_path = os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder, best_weights)
+            print('found best acc weights file:{}'.format(weights_path))
+            print('load best training file to test acc...')
+            net.load_state_dict(torch.load(weights_path))
+            best_acc = eval_training(tb=False)
+            print('best acc is {:0.2f}'.format(best_acc))
+
+        recent_weights_file = most_recent_weights(os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder))
+        if not recent_weights_file:
+            raise Exception('no recent weights file were found')
+        weights_path = os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder, recent_weights_file)
+        print('loading weights file {} to resume training.....'.format(weights_path))
+        net.load_state_dict(torch.load(weights_path))
+
+        resume_epoch = last_epoch(os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder))
+
+    time_in = perf_counter()
+    for epoch in range(1, settings.EPOCH + 1):
+        if epoch > args.warm:
+            train_scheduler.step(epoch)
+
+        if args.resume:
+            if epoch <= resume_epoch:
+                continue
+
+        train(epoch)
+        acc = eval_training(epoch)
+
+        #start to save best performance model after learning rate decay to 0.01
+        if epoch > settings.MILESTONES[1] and best_acc < acc:
+            weights_path = checkpoint_path.format(net=args.net, epoch=epoch, type='best')
+            print('saving weights file to {}'.format(weights_path))
+            torch.save(net.state_dict(), weights_path)
+            best_acc = acc
+            continue
+
+        if not epoch % settings.SAVE_EPOCH:
+            weights_path = checkpoint_path.format(net=args.net, epoch=epoch, type='regular')
+            print('saving weights file to {}'.format(weights_path))
+            torch.save(net.state_dict(), weights_path)
+    time_out = perf_counter()
+    with open("train.log", "a") as f:
+        f.write("train at" + current_time + " ")
+        f.write("train time: {:.2f}s\n".format(time_out - time_in))
+    writer.close()
diff --git a/cangjie_train_b3.py b/cangjie_train_b3.py
new file mode 100644
index 00000000..90e82e52
--- /dev/null
+++ b/cangjie_train_b3.py
@@ -0,0 +1,301 @@
+from cangjie_utils_b3 import get_training_loader,get_val_loader
+from models.squeezenet import squeezenet
+from time import perf_counter
+import argparse
+import os
+# from utils import WarmUpLR
+
+import torch
+import torch.nn as nn
+from conf import settings
+
+from utils import  WarmUpLR, \
+    most_recent_folder, most_recent_weights, last_epoch, best_acc_weights
+from torch.utils.tensorboard import SummaryWriter
+import torch.nn.functional as F
+
+from cangjie_models.sqnetR4 import sqnetr4
+from cangjie_models.sqnetR4C3579 import sqnetr4c3579
+from cangjie_models.sqnetD4 import sqnetd4
+from cangjie_models.sqnetD4C3579 import sqnetd4c3579
+from cangjie_models.sqnetR4C3579RNN import sqnetr4c3579rnn
+
+def train(epoch):
+    start_time = perf_counter()
+    net.train()
+    for batch_index, (images, labels, string_labels_padded, target_lengths) in enumerate(ETL952TrainLoader):
+
+        if args.gpu:
+            labels = labels.cuda().long()
+            images = images.cuda()
+            string_labels_padded = string_labels_padded.cuda()
+            target_lengths = target_lengths.cuda()
+        else:
+            labels = labels.long()
+            target_lengths = target_lengths.long()
+
+
+
+        optimizer.zero_grad()
+        x_cls, x_seq = net(images)
+        # loss = loss_function(outputs, labels)
+        # loss.backward()
+        # optimizer.step()
+
+        
+
+        loss_class = criterion_class(x_cls, labels)
+
+        batch_size = x_seq.size(0)
+        sequence_length = x_seq.size(1)
+        input_lengths = torch.full((batch_size,), sequence_length, dtype=torch.long)
+        print("x_seq shape:", x_seq.shape)
+        print("string_labels_padded shape:", string_labels_padded.shape)
+        print("input_lengths shape:", input_lengths.shape)
+        print("target_lengths shape:", target_lengths.shape)
+        loss_seq = criterion_seq(x_seq.log_softmax(2), string_labels_padded, input_lengths, target_lengths)
+        
+        loss = loss_class + loss_seq
+        loss.backward()
+        optimizer.step()
+
+        n_iter = (epoch - 1) * len(ETL952TrainLoader) + batch_index + 1    
+        last_layer = list(net.children())[-1]
+        for name, para in last_layer.named_parameters():
+            if 'weight' in name:
+                writer.add_scalar('LastLayerGradients/grad_norm2_weights', para.grad.norm(), n_iter)
+            if 'bias' in name:
+                writer.add_scalar('LastLayerGradients/grad_norm2_bias', para.grad.norm(), n_iter)
+        print('Training Epoch: {epoch}/{total_epochs} [{trained_samples}/{total_samples}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format(
+            loss.item(),
+            optimizer.param_groups[0]['lr'],
+            epoch=epoch,
+            total_epochs=settings.EPOCH,
+            trained_samples=batch_index * args.batch_size + len(images),
+            total_samples=len(ETL952TrainLoader.dataset)
+        ))
+
+        #update training loss for each iteration
+        writer.add_scalar('Train/loss', loss.item(), n_iter)
+
+        if epoch <= args.warm:
+            warmup_scheduler.step()
+
+    for name, param in net.named_parameters():
+        layer, attr = os.path.splitext(name)
+        attr = attr[1:]
+        writer.add_histogram("{}/{}".format(layer, attr), param, epoch)
+
+    finish_time = perf_counter()
+    with open('train.log', 'a') as f:
+        f.write('epoch {} training time consumed: {:.2f}s\n'.format(epoch, finish_time - start_time))
+    print('epoch {} training time consumed: {:.2f}s'.format(epoch, finish_time - start_time))
+
+import torch
+from time import perf_counter
+from torch.nn.utils.rnn import pad_sequence
+
+@torch.no_grad()
+def eval_training(epoch=0, tb=True):
+    start_time = perf_counter()
+    net.eval()
+    test_loss_class = 0.0  # Classification loss
+    test_loss_seq = 0.0    # Sequence loss
+    correct = 0.0
+    total_sequences = 0
+    correct_sequences = 0
+
+    for batch_index, (images, class_labels, string_labels_padded, target_lengths) in enumerate(ETL952ValLoader):
+        if torch.cuda.is_available():
+            images = images.cuda()
+            class_labels = class_labels.cuda().long()
+            string_labels_padded = string_labels_padded.cuda()
+            target_lengths = target_lengths.cuda()
+        
+        # Forward pass
+        outputs_class, outputs_seq = net(images)
+        
+        # Classification loss
+        loss_class = criterion_class(outputs_class, class_labels)
+        
+        # Sequence loss
+        loss_seq = criterion_seq(outputs_seq.log_softmax(2), string_labels_padded, torch.full((outputs_seq.size(0),), outputs_seq.size(1), dtype=torch.long).to("cuda"), target_lengths)
+        
+        # Accumulate losses
+        test_loss_class += loss_class.item()
+        test_loss_seq += loss_seq.item()
+        
+        # Calculate accuracy
+        _, preds = outputs_class.max(1)
+        correct += preds.eq(class_labels).sum().item()
+        
+        # Evaluate sequence prediction accuracy (pseudo code, needs actual implementation)
+        # Here we would compare the predicted sequences with the ground truth sequences
+        # This part of accuracy calculation is simplified and needs to be replaced with actual evaluation logic
+        for i in range(len(string_labels_padded)):
+            predicted_sequence = outputs_seq.argmax(dim=2)[i]
+            target_sequence = string_labels_padded[i, :target_lengths[i]]
+            if torch.equal(predicted_sequence[:len(target_sequence)], target_sequence):
+                correct_sequences += 1
+            total_sequences += 1
+
+    finish_time = perf_counter()
+    
+    # Print results
+    print('Evaluating Network.....')
+    print('Epoch: {}, Classification Test Loss: {:.4f}, Sequence Test Loss: {:.4f}, Test Accuracy: {:.4f}, Sequence Accuracy: {:.4f}, Time consumed: {:.2f}s'.format(
+        epoch, 
+        test_loss_class / len(ETL952ValLoader.dataset),
+        test_loss_seq / len(ETL952ValLoader.dataset),
+        correct / len(ETL952ValLoader.dataset),
+        correct_sequences / total_sequences if total_sequences > 0 else 0,
+        finish_time - start_time
+    ))
+    
+    # TensorBoard logging
+    if tb:
+        writer.add_scalar('Test/Classification Average loss', test_loss_class / len(ETL952ValLoader.dataset), epoch)
+        writer.add_scalar('Test/Sequence Average loss', test_loss_seq / len(ETL952ValLoader.dataset), epoch)
+        writer.add_scalar('Test/Accuracy', correct / len(ETL952ValLoader.dataset), epoch)
+        writer.add_scalar('Test/Sequence Accuracy', correct_sequences / total_sequences if total_sequences > 0 else 0, epoch)
+    
+    return correct / len(ETL952ValLoader.dataset), correct_sequences / total_sequences if total_sequences > 0 else 0
+
+
+if __name__ == '__main__':
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-net', default='sqnetr4c3579rnn', type=str, help='net type')
+    parser.add_argument('-gpu', type=bool, default=False, help='use gpu or not')
+    parser.add_argument('-batch_size', type=int, default=64, help='batch size for dataloader')
+    parser.add_argument('-warm', type=int, default=1, help='warm up training phase')
+    parser.add_argument('-lr', type=float, default=0.1, help='initial learning rate')
+    parser.add_argument('-resume', action='store_true', default=False, help='resume training')
+    args = parser.parse_args()
+
+    current_time = settings.TIME_NOW
+
+    
+
+    #model
+    net = sqnetr4c3579rnn()
+    file_name = "net_ver"+current_time+".log"
+    net_ver_path = os.path.join('net_ver',file_name)
+    if not os.path.exists('net_ver'):
+        os.mkdir('net_ver')
+    with open(net_ver_path, "w") as f:
+        f.write(str(net))
+    print("assigned model")
+
+    #data
+    time_in = perf_counter()
+    print("start loading data")
+    ETL952TrainLoader = get_training_loader(batch_size=args.batch_size)
+    ETL952ValLoader = get_val_loader(batch_size=args.batch_size)
+    print("data loaded")
+    time_out = perf_counter()
+    with open("train.log", "a") as f:
+        f.write("net_ver"+ current_time + " ")
+        f.write("train at" + current_time + " ")
+        f.write("data loading time: {:.2f}s\n".format(time_out - time_in))
+
+
+    #setup model
+
+    # loss_function = torch.nn.CrossEntropyLoss()
+    criterion_class = torch.nn.CrossEntropyLoss()
+    criterion_seq = torch.nn.CTCLoss(blank=0)
+
+    optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
+    train_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay
+    iter_per_epoch = len(ETL952TrainLoader)
+    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)
+    
+    
+
+    # def ctc_loss(y_true, y_pred, input_lengths, target_lengths):
+    #     return F.ctc_loss(y_pred, y_true, input_lengths, target_lengths)
+
+    # def combined_loss(class_pred, class_true, seq_pred, seq_true, input_lengths, target_lengths):
+    #     classification_loss = F.cross_entropy(class_pred, class_true)
+    #     sequence_loss = ctc_loss(seq_true, seq_pred, input_lengths, target_lengths)
+    #     return classification_loss + sequence_loss
+
+    if args.resume:
+        recent_folder = most_recent_folder(os.path.join(settings.CHECKPOINT_PATH, args.net), fmt=settings.DATE_FORMAT)
+        if not recent_folder:
+            raise Exception('no recent folder were found')
+
+        checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder)
+
+    else:
+        checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, args.net, settings.TIME_NOW)
+
+    #use tensorboard
+    if not os.path.exists(settings.LOG_DIR):
+        os.mkdir(settings.LOG_DIR)
+
+    #since tensorboard can't overwrite old values
+    #so the only way is to create a new tensorboard log
+    writer = SummaryWriter(log_dir=os.path.join(
+            settings.LOG_DIR, args.net, current_time))
+    input_tensor = torch.Tensor(1, 3, 32, 32)
+    if args.gpu:
+        input_tensor = input_tensor.cuda()
+        net = net.cuda()
+    writer.add_graph(net, input_tensor)
+
+    #create checkpoint folder to save model
+    if not os.path.exists(checkpoint_path):
+        os.makedirs(checkpoint_path)
+    checkpoint_path = os.path.join(checkpoint_path, '{net}-{epoch}-{type}.pth')
+
+    best_acc = 0.0
+    if args.resume:
+        best_weights = best_acc_weights(os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder))
+        if best_weights:
+            weights_path = os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder, best_weights)
+            print('found best acc weights file:{}'.format(weights_path))
+            print('load best training file to test acc...')
+            net.load_state_dict(torch.load(weights_path))
+            best_acc = eval_training(tb=False)
+            print('best acc is {:0.2f}'.format(best_acc))
+
+        recent_weights_file = most_recent_weights(os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder))
+        if not recent_weights_file:
+            raise Exception('no recent weights file were found')
+        weights_path = os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder, recent_weights_file)
+        print('loading weights file {} to resume training.....'.format(weights_path))
+        net.load_state_dict(torch.load(weights_path))
+
+        resume_epoch = last_epoch(os.path.join(settings.CHECKPOINT_PATH, args.net, recent_folder))
+
+    time_in = perf_counter()
+    for epoch in range(1, settings.EPOCH + 1):
+        if epoch > args.warm:
+            train_scheduler.step(epoch)
+
+        if args.resume:
+            if epoch <= resume_epoch:
+                continue
+
+        train(epoch)
+        acc = eval_training(epoch)
+
+        #start to save best performance model after learning rate decay to 0.01
+        if epoch > settings.MILESTONES[1] and best_acc < acc:
+            weights_path = checkpoint_path.format(net=args.net, epoch=epoch, type='best')
+            print('saving weights file to {}'.format(weights_path))
+            torch.save(net.state_dict(), weights_path)
+            best_acc = acc
+            continue
+
+        if not epoch % settings.SAVE_EPOCH:
+            weights_path = checkpoint_path.format(net=args.net, epoch=epoch, type='regular')
+            print('saving weights file to {}'.format(weights_path))
+            torch.save(net.state_dict(), weights_path)
+    time_out = perf_counter()
+    with open("train.log", "a") as f:
+        f.write("train at" + current_time + " ")
+        f.write("train time: {:.2f}s\n".format(time_out - time_in))
+    writer.close()
\ No newline at end of file
diff --git a/cangjie_utils.py b/cangjie_utils.py
new file mode 100644
index 00000000..963b3c6a
--- /dev/null
+++ b/cangjie_utils.py
@@ -0,0 +1,54 @@
+from cangjie_dataset import ETL952Train, ETL952Test, ETL952Val, ETL952Labels
+from torch.utils.data import DataLoader
+
+def get_training_loader(batch_size=128, num_workers=4, shuffle=True):
+    """
+    return training data loader
+    
+    Args:
+        batch_size:
+        num_workers:
+        shuffle:
+
+    Returns: train_data_loader: torch dataloader object
+    """
+
+    # train_set = ETL952Train(root_dir="pytorch-cifar100")
+    train_set = ETL952Train(root_dir="")
+    train_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=shuffle,num_workers=num_workers)
+    return train_data_loader
+
+def get_test_loader(batch_size=128, num_workers=4, shuffle=True):
+    """
+    return test data loader
+    
+    Args:
+        batch_size:
+        num_workers:
+        shuffle:
+
+    Returns: test_data_loader: torch dataloader object
+    """
+
+    # test_set = ETL952Test(root_dir="pytorch-cifar100")
+    test_set = ETL952Test(root_dir="")
+    test_data_loader = DataLoader(test_set, batch_size=batch_size, shuffle=shuffle,num_workers=num_workers)
+    return test_data_loader
+
+def get_val_loader(batch_size=128, num_workers=4, shuffle=True):
+    """
+    return val data loader
+    
+    Args:
+        batch_size:
+        num_workers:
+        shuffle:
+
+    Returns: val_data_loader: torch dataloader object
+    """
+
+    # test_set = ETL952Test(root_dir="pytorch-cifar100")
+    val_set = ETL952Val(root_dir="")
+    val_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=shuffle,num_workers=num_workers)
+    return val_data_loader
+
diff --git a/cangjie_utils_b3.py b/cangjie_utils_b3.py
new file mode 100644
index 00000000..58e4210d
--- /dev/null
+++ b/cangjie_utils_b3.py
@@ -0,0 +1,72 @@
+from cangjie_dataset_b3 import ETL952Train, ETL952Test, ETL952Val
+from torch.utils.data import DataLoader
+import torch
+from torch.nn.utils.rnn import pad_sequence
+
+
+def collate_fn(batch):
+    images, class_labels, string_labels = zip(*batch)
+    
+    images = torch.stack(images)
+    class_labels = torch.tensor(class_labels)
+    
+    # Convert string labels to numerical labels (e.g., ASCII values or a predefined mapping)
+    string_labels_numerical = [[ord(char) - ord('a') + 1 for char in label] for label in string_labels]
+    string_labels_tensors = [torch.tensor(label) for label in string_labels_numerical]
+    string_labels_padded = pad_sequence(string_labels_tensors, batch_first=True, padding_value=0)
+    
+    target_lengths = torch.tensor([len(label) for label in string_labels_tensors])
+    
+    return images, class_labels, string_labels_padded, target_lengths
+
+def get_training_loader(batch_size=128, num_workers=4, shuffle=True):
+    """
+    return training data loader
+    
+    Args:
+        batch_size:
+        num_workers:
+        shuffle:
+
+    Returns: train_data_loader: torch dataloader object
+    """
+
+    # train_set = ETL952Train(root_dir="pytorch-cifar100")
+    train_set = ETL952Train(root_dir="")
+    train_data_loader = DataLoader(train_set, batch_size=batch_size, shuffle=shuffle,num_workers=num_workers, collate_fn=collate_fn)
+    return train_data_loader
+
+def get_test_loader(batch_size=128, num_workers=4, shuffle=True):
+    """
+    return test data loader
+    
+    Args:
+        batch_size:
+        num_workers:
+        shuffle:
+
+    Returns: test_data_loader: torch dataloader object
+    """
+
+    # test_set = ETL952Test(root_dir="pytorch-cifar100")
+    test_set = ETL952Test(root_dir="")
+    test_data_loader = DataLoader(test_set, batch_size=batch_size, shuffle=shuffle,num_workers=num_workers, collate_fn=collate_fn)
+    return test_data_loader
+
+def get_val_loader(batch_size=128, num_workers=4, shuffle=True):
+    """
+    return val data loader
+    
+    Args:
+        batch_size:
+        num_workers:
+        shuffle:
+
+    Returns: val_data_loader: torch dataloader object
+    """
+
+    # test_set = ETL952Test(root_dir="pytorch-cifar100")
+    val_set = ETL952Val(root_dir="")
+    val_data_loader = DataLoader(val_set, batch_size=batch_size, shuffle=shuffle,num_workers=num_workers, collate_fn=collate_fn)
+    return val_data_loader
+
diff --git a/conf/global_settings.py b/conf/global_settings.py
index 8077b363..89ca06bd 100644
--- a/conf/global_settings.py
+++ b/conf/global_settings.py
@@ -19,8 +19,8 @@
 CHECKPOINT_PATH = 'checkpoint'
 
 #total training epoches
-EPOCH = 200
-MILESTONES = [60, 120, 160]
+EPOCH = 10
+MILESTONES = [2,4,6,8,10]
 
 #initial learning rate
 #INIT_LR = 0.1
@@ -33,7 +33,7 @@
 LOG_DIR = 'runs'
 
 #save weights file per SAVE_EPOCH epoch
-SAVE_EPOCH = 10
+SAVE_EPOCH = 1
 
 
 
diff --git a/dataset.py b/dataset.py
index 9578dc27..28feffb0 100644
--- a/dataset.py
+++ b/dataset.py
@@ -61,3 +61,5 @@ def __getitem__(self, index):
             image = self.transform(image)
         return label, image
 
+
+
diff --git a/history.txt b/history.txt
new file mode 100644
index 00000000..69cb997a
--- /dev/null
+++ b/history.txt
@@ -0,0 +1,7 @@
+python cangjie_train.py -gpu True
+conda deactivate
+conda activate w2_2
+python cangjie_train.py -gpu True
+python cangjie_test.py -weight checkpoint\squeezenet\Monday_15_July_2024_00h_34m_50s\squeezenet-7-best.pth -gpu
+python cangjie_train.py -gpu True
+doskey /HISTORY > history.txt
diff --git a/models/squeezenet.py b/models/squeezenet.py
index a8821eb5..698f930d 100644
--- a/models/squeezenet.py
+++ b/models/squeezenet.py
@@ -48,10 +48,11 @@ def forward(self, x):
 class SqueezeNet(nn.Module):
 
     """mobile net with simple bypass"""
-    def __init__(self, class_num=100):
+    def __init__(self, class_num=952):
 
         super().__init__()
         self.stem = nn.Sequential(
+            # nn.MaxPool2d(2, 2),
             nn.Conv2d(3, 96, 3, padding=1),
             nn.BatchNorm2d(96),
             nn.ReLU(inplace=True),
@@ -66,13 +67,12 @@ def __init__(self, class_num=100):
         self.fire7 = Fire(384, 384, 48)
         self.fire8 = Fire(384, 512, 64)
         self.fire9 = Fire(512, 512, 64)
-
         self.conv10 = nn.Conv2d(512, class_num, 1)
         self.avg = nn.AdaptiveAvgPool2d(1)
         self.maxpool = nn.MaxPool2d(2, 2)
 
     def forward(self, x):
-        x = self.stem(x)
+        x = self.stem(x)       
 
         f2 = self.fire2(x)
         f3 = self.fire3(f2) + f2
@@ -93,5 +93,5 @@ def forward(self, x):
 
         return x
 
-def squeezenet(class_num=100):
+def squeezenet(class_num=952):
     return SqueezeNet(class_num=class_num)
diff --git a/test.py b/test.py
index dab61a05..19bed073 100644
--- a/test.py
+++ b/test.py
@@ -77,3 +77,9 @@
     print("Top 1 err: ", 1 - correct_1 / len(cifar100_test_loader.dataset))
     print("Top 5 err: ", 1 - correct_5 / len(cifar100_test_loader.dataset))
     print("Parameter numbers: {}".format(sum(p.numel() for p in net.parameters())))
+
+
+# from models.squeezenet import SqueezeNet
+
+# model = SqueezeNet(class_num=100)
+# model.load_state_dict(torch.load('checkpoint\squeezenet\Saturday_13_July_2024_18h_05m_14s\squeezenet-200-regular.pth'))
\ No newline at end of file
diff --git a/train.log b/train.log
new file mode 100644
index 00000000..fa72e4f7
--- /dev/null
+++ b/train.log
@@ -0,0 +1,135 @@
+net_verMonday_15_July_2024_13h_36m_07s train atMonday_15_July_2024_13h_36m_07s data loading time: 1020.47s
+epoch 1 training time consumed: 130.26s
+epoch 2 training time consumed: 122.33s
+epoch 3 training time consumed: 121.47s
+epoch 4 training time consumed: 120.14s
+epoch 5 training time consumed: 117.78s
+epoch 6 training time consumed: 119.11s
+epoch 7 training time consumed: 117.37s
+epoch 8 training time consumed: 120.20s
+epoch 9 training time consumed: 120.56s
+epoch 10 training time consumed: 121.52s
+train atMonday_15_July_2024_13h_36m_07s train time: 1477.86s
+net_verMonday_15_July_2024_14h_17m_38s train atMonday_15_July_2024_14h_17m_38s data loading time: 991.61s
+epoch 1 training time consumed: 82.49s
+epoch 2 training time consumed: 77.83s
+epoch 3 training time consumed: 77.33s
+epoch 4 training time consumed: 76.96s
+epoch 5 training time consumed: 76.66s
+epoch 6 training time consumed: 78.00s
+epoch 7 training time consumed: 77.45s
+epoch 8 training time consumed: 77.18s
+epoch 9 training time consumed: 76.56s
+epoch 10 training time consumed: 79.04s
+train atMonday_15_July_2024_14h_17m_38s train time: 1005.25s
+net_verMonday_15_July_2024_14h_52m_18s train atMonday_15_July_2024_14h_52m_18s data loading time: 948.40s
+epoch 1 training time consumed: 83.59s
+epoch 2 training time consumed: 85.18s
+epoch 3 training time consumed: 93.11s
+epoch 4 training time consumed: 81.24s
+epoch 5 training time consumed: 79.15s
+epoch 6 training time consumed: 80.56s
+epoch 7 training time consumed: 78.63s
+epoch 8 training time consumed: 80.09s
+epoch 9 training time consumed: 85.16s
+epoch 10 training time consumed: 83.01s
+train atMonday_15_July_2024_14h_52m_18s train time: 1064.90s
+net_verMonday_15_July_2024_15h_52m_03s train atMonday_15_July_2024_15h_52m_03s data loading time: 1338.00s
+epoch 1 training time consumed: 121.11s
+epoch 2 training time consumed: 115.59s
+epoch 3 training time consumed: 114.57s
+epoch 4 training time consumed: 114.46s
+epoch 5 training time consumed: 114.21s
+epoch 6 training time consumed: 114.10s
+epoch 7 training time consumed: 114.27s
+epoch 8 training time consumed: 114.46s
+epoch 9 training time consumed: 114.25s
+epoch 10 training time consumed: 115.09s
+train atMonday_15_July_2024_15h_52m_03s train time: 1417.98s
+net_verMonday_15_July_2024_16h_51m_39s train atMonday_15_July_2024_16h_51m_39s data loading time: 1132.75s
+epoch 1 training time consumed: 98.84s
+epoch 2 training time consumed: 93.67s
+epoch 3 training time consumed: 92.61s
+epoch 4 training time consumed: 92.33s
+epoch 5 training time consumed: 92.92s
+epoch 6 training time consumed: 92.64s
+epoch 7 training time consumed: 91.90s
+epoch 8 training time consumed: 92.31s
+epoch 9 training time consumed: 92.45s
+epoch 10 training time consumed: 92.17s
+train atMonday_15_July_2024_16h_51m_39s train time: 1185.14s
+net_verMonday_15_July_2024_17h_33m_11s train atMonday_15_July_2024_17h_33m_11s data loading time: 1149.25s
+epoch 1 training time consumed: 96.32s
+epoch 2 training time consumed: 85.74s
+epoch 3 training time consumed: 85.63s
+epoch 4 training time consumed: 82.57s
+epoch 5 training time consumed: 84.45s
+epoch 6 training time consumed: 100.93s
+epoch 7 training time consumed: 104.32s
+epoch 8 training time consumed: 89.12s
+epoch 9 training time consumed: 92.80s
+epoch 10 training time consumed: 91.46s
+train atMonday_15_July_2024_17h_33m_11s train time: 1195.30s
+net_verMonday_15_July_2024_18h_17m_36s train atMonday_15_July_2024_18h_17m_36s data loading time: 1089.67s
+epoch 1 training time consumed: 83.32s
+epoch 2 training time consumed: 64.00s
+epoch 3 training time consumed: 64.05s
+epoch 4 training time consumed: 64.41s
+epoch 5 training time consumed: 66.17s
+epoch 6 training time consumed: 68.41s
+epoch 7 training time consumed: 72.11s
+epoch 8 training time consumed: 63.72s
+epoch 9 training time consumed: 67.06s
+epoch 10 training time consumed: 71.98s
+train atMonday_15_July_2024_18h_17m_36s train time: 932.04s
+net_verMonday_15_July_2024_18h_53m_27s train atMonday_15_July_2024_18h_53m_27s data loading time: 991.86s
+epoch 1 training time consumed: 89.76s
+epoch 2 training time consumed: 81.88s
+epoch 3 training time consumed: 82.32s
+epoch 4 training time consumed: 81.63s
+epoch 5 training time consumed: 85.32s
+epoch 6 training time consumed: 82.27s
+epoch 7 training time consumed: 81.29s
+epoch 8 training time consumed: 81.92s
+epoch 9 training time consumed: 81.58s
+epoch 10 training time consumed: 81.12s
+train atMonday_15_July_2024_18h_53m_27s train time: 1061.66s
+net_verMonday_15_July_2024_19h_35m_20s train atMonday_15_July_2024_19h_35m_20s data loading time: 965.29s
+epoch 1 training time consumed: 64.67s
+epoch 2 training time consumed: 58.33s
+epoch 3 training time consumed: 59.22s
+epoch 4 training time consumed: 58.85s
+epoch 5 training time consumed: 58.46s
+epoch 6 training time consumed: 57.91s
+epoch 7 training time consumed: 62.43s
+epoch 8 training time consumed: 59.35s
+epoch 9 training time consumed: 58.67s
+epoch 10 training time consumed: 58.35s
+train atMonday_15_July_2024_19h_35m_20s train time: 817.57s
+net_verMonday_15_July_2024_20h_12m_01s train atMonday_15_July_2024_20h_12m_01s data loading time: 999.80s
+epoch 1 training time consumed: 85.90s
+epoch 2 training time consumed: 78.41s
+epoch 3 training time consumed: 73.76s
+epoch 4 training time consumed: 73.77s
+epoch 5 training time consumed: 73.85s
+epoch 6 training time consumed: 75.34s
+epoch 7 training time consumed: 73.62s
+epoch 8 training time consumed: 74.30s
+epoch 9 training time consumed: 77.02s
+epoch 10 training time consumed: 74.41s
+train atMonday_15_July_2024_20h_12m_01s train time: 979.27s
+net_verMonday_15_July_2024_20h_50m_57s train atMonday_15_July_2024_20h_50m_57s data loading time: 1011.95s
+epoch 1 training time consumed: 65.02s
+epoch 2 training time consumed: 57.06s
+epoch 3 training time consumed: 57.36s
+epoch 4 training time consumed: 57.89s
+epoch 5 training time consumed: 55.65s
+epoch 6 training time consumed: 60.06s
+epoch 7 training time consumed: 58.41s
+epoch 8 training time consumed: 55.88s
+epoch 9 training time consumed: 56.47s
+epoch 10 training time consumed: 54.50s
+train atMonday_15_July_2024_20h_50m_57s train time: 789.56s
+net_verMonday_15_July_2024_22h_19m_16s train atMonday_15_July_2024_22h_19m_16s data loading time: 1021.04s
+net_verMonday_15_July_2024_22h_41m_03s train atMonday_15_July_2024_22h_41m_03s data loading time: 1041.47s
+net_verMonday_15_July_2024_23h_09m_36s train atMonday_15_July_2024_23h_09m_36s data loading time: 1028.62s
diff --git a/train.py b/train.py
index c5034606..402f77c6 100644
--- a/train.py
+++ b/train.py
@@ -30,7 +30,7 @@ def train(epoch):
 
     start = time.time()
     net.train()
-    for batch_index, (images, labels) in enumerate(cifar100_training_loader):
+    for batch_index, (images, labels) in enumerate(ETL952_training_loader):
 
         if args.gpu:
             labels = labels.cuda()
@@ -42,7 +42,7 @@ def train(epoch):
         loss.backward()
         optimizer.step()
 
-        n_iter = (epoch - 1) * len(cifar100_training_loader) + batch_index + 1
+        n_iter = (epoch - 1) * len(ETL952_training_loader) + batch_index + 1
 
         last_layer = list(net.children())[-1]
         for name, para in last_layer.named_parameters():
@@ -56,7 +56,7 @@ def train(epoch):
             optimizer.param_groups[0]['lr'],
             epoch=epoch,
             trained_samples=batch_index * args.b + len(images),
-            total_samples=len(cifar100_training_loader.dataset)
+            total_samples=len(ETL952_training_loader.dataset)
         ))
 
         #update training loss for each iteration
@@ -83,7 +83,7 @@ def eval_training(epoch=0, tb=True):
     test_loss = 0.0 # cost function error
     correct = 0.0
 
-    for (images, labels) in cifar100_test_loader:
+    for (images, labels) in ETL952_test_loader:
 
         if args.gpu:
             images = images.cuda()
@@ -103,25 +103,25 @@ def eval_training(epoch=0, tb=True):
     print('Evaluating Network.....')
     print('Test set: Epoch: {}, Average loss: {:.4f}, Accuracy: {:.4f}, Time consumed:{:.2f}s'.format(
         epoch,
-        test_loss / len(cifar100_test_loader.dataset),
-        correct.float() / len(cifar100_test_loader.dataset),
+        test_loss / len(ETL952_test_loader.dataset),
+        correct.float() / len(ETL952_test_loader.dataset),
         finish - start
     ))
     print()
 
     #add informations to tensorboard
     if tb:
-        writer.add_scalar('Test/Average loss', test_loss / len(cifar100_test_loader.dataset), epoch)
-        writer.add_scalar('Test/Accuracy', correct.float() / len(cifar100_test_loader.dataset), epoch)
+        writer.add_scalar('Test/Average loss', test_loss / len(ETL952_test_loader.dataset), epoch)
+        writer.add_scalar('Test/Accuracy', correct.float() / len(ETL952_test_loader.dataset), epoch)
 
-    return correct.float() / len(cifar100_test_loader.dataset)
+    return correct.float() / len(ETL952_test_loader.dataset)
 
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser()
     parser.add_argument('-net', type=str, required=True, help='net type')
     parser.add_argument('-gpu', action='store_true', default=False, help='use gpu or not')
-    parser.add_argument('-b', type=int, default=128, help='batch size for dataloader')
+    parser.add_argument('-b', type=int, default=256, help='batch size for dataloader')
     parser.add_argument('-warm', type=int, default=1, help='warm up training phase')
     parser.add_argument('-lr', type=float, default=0.1, help='initial learning rate')
     parser.add_argument('-resume', action='store_true', default=False, help='resume training')
@@ -130,7 +130,7 @@ def eval_training(epoch=0, tb=True):
     net = get_network(args)
 
     #data preprocessing:
-    cifar100_training_loader = get_training_dataloader(
+    ETL952_training_loader = get_training_dataloader(
         settings.CIFAR100_TRAIN_MEAN,
         settings.CIFAR100_TRAIN_STD,
         num_workers=4,
@@ -138,7 +138,7 @@ def eval_training(epoch=0, tb=True):
         shuffle=True
     )
 
-    cifar100_test_loader = get_test_dataloader(
+    ETL952_test_loader = get_test_dataloader(
         settings.CIFAR100_TRAIN_MEAN,
         settings.CIFAR100_TRAIN_STD,
         num_workers=4,
@@ -149,7 +149,7 @@ def eval_training(epoch=0, tb=True):
     loss_function = nn.CrossEntropyLoss()
     optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
     train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=settings.MILESTONES, gamma=0.2) #learning rate decay
-    iter_per_epoch = len(cifar100_training_loader)
+    iter_per_epoch = len(ETL952_training_loader)
     warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)
 
     if args.resume:
diff --git a/utils.py b/utils.py
index f2cfac38..1714b6e0 100644
--- a/utils.py
+++ b/utils.py
@@ -19,8 +19,15 @@
 def get_network(args):
     """ return given network
     """
-
-    if args.net == 'vgg16':
+    print(args.net)
+    if args.net == 'sqnetf4':
+        from cangjie_models.sqnetF4 import sqnetf4
+        net = sqnetf4()
+        print("network selected")
+    elif args.net == 'sqnetc3579':
+        from cangjie_models.sqnetC3579 import sqnetc3579
+        net = sqnetc3579()
+    elif args.net == 'vgg16':
         from models.vgg import vgg16_bn
         net = vgg16_bn()
     elif args.net == 'vgg13':
@@ -305,4 +312,4 @@ def best_acc_weights(weights_folder):
         return ''
 
     best_files = sorted(best_files, key=lambda w: int(re.search(regex_str, w).groups()[1]))
-    return best_files[-1]
\ No newline at end of file
+    return best_files[-1]