diff --git a/neural_network/back_propagation_neural_network.py b/neural_network/back_propagation_neural_network.py
index 182f759c5fc7..65ad60d0b06d 100644
--- a/neural_network/back_propagation_neural_network.py
+++ b/neural_network/back_propagation_neural_network.py
@@ -1,167 +1,100 @@
-#!/usr/bin/python
-
-"""
-
-A Framework of Back Propagation Neural Network (BP) model
-
-Easy to use:
-    * add many layers as you want ! ! !
-    * clearly see how the loss decreasing
-Easy to expand:
-    * more activation functions
-    * more loss functions
-    * more optimization method
-
-Author: Stephen Lee
-Github : https://github.com/RiptideBo
-Date: 2017.11.23
-
-"""
-
 import numpy as np
 from matplotlib import pyplot as plt
 
 
-def sigmoid(x: np.ndarray) -> np.ndarray:
-    return 1 / (1 + np.exp(-x))
-
-
 class DenseLayer:
-    """
-    Layers of BP neural network
-    """
-
-    def __init__(
-        self, units, activation=None, learning_rate=None, is_input_layer=False
-    ):
-        """
-        common connected layer of bp network
-        :param units: numbers of neural units
-        :param activation: activation function
-        :param learning_rate: learning rate for paras
-        :param is_input_layer: whether it is input layer or not
-        """
+    def __init__(self, units: int, activation=None, learning_rate: float = 0.3):
         self.units = units
         self.weight = None
         self.bias = None
-        self.activation = activation
-        if learning_rate is None:
-            learning_rate = 0.3
-        self.learn_rate = learning_rate
-        self.is_input_layer = is_input_layer
-
-    def initializer(self, back_units):
-        rng = np.random.default_rng()
-        self.weight = np.asmatrix(rng.normal(0, 0.5, (self.units, back_units)))
-        self.bias = np.asmatrix(rng.normal(0, 0.5, self.units)).T
-        if self.activation is None:
-            self.activation = sigmoid
-
-    def cal_gradient(self):
-        # activation function may be sigmoid or linear
+        self.activation = activation if activation else sigmoid
+        self.learning_rate = learning_rate
+
+    def initializer(self, back_units: int):
+        self.weight = np.random.normal(0, 0.5, (self.units, back_units))
+        self.bias = np.random.normal(0, 0.5, self.units).reshape(-1, 1)
+
+    def forward_propagation(self, xdata: np.ndarray) -> np.ndarray:
+        self.wx_plus_b = np.dot(self.weight, xdata) - self.bias
+        self.output = self.activation(self.wx_plus_b)
+        return self.output
+
+    def back_propagation(self, gradient: np.ndarray) -> np.ndarray:
+        gradient_activation = self.cal_gradient()
+        gradient = np.dot(gradient.T, gradient_activation)
+        self.weight -= self.learning_rate * np.dot(gradient.T, self.xdata)
+        self.bias -= self.learning_rate * gradient
+        return gradient
+
+    def cal_gradient(self) -> np.ndarray:
         if self.activation == sigmoid:
-            gradient_mat = np.dot(self.output, (1 - self.output).T)
-            gradient_activation = np.diag(np.diag(gradient_mat))
+            return np.diag(np.diag(np.dot(self.output, (1 - self.output).T)))
         else:
-            gradient_activation = 1
-        return gradient_activation
-
-    def forward_propagation(self, xdata):
-        self.xdata = xdata
-        if self.is_input_layer:
-            # input layer
-            self.wx_plus_b = xdata
-            self.output = xdata
-            return xdata
-        else:
-            self.wx_plus_b = np.dot(self.weight, self.xdata) - self.bias
-            self.output = self.activation(self.wx_plus_b)
-            return self.output
-
-    def back_propagation(self, gradient):
-        gradient_activation = self.cal_gradient()  # i * i 维
-        gradient = np.asmatrix(np.dot(gradient.T, gradient_activation))
-
-        self._gradient_weight = np.asmatrix(self.xdata)
-        self._gradient_bias = -1
-        self._gradient_x = self.weight
-
-        self.gradient_weight = np.dot(gradient.T, self._gradient_weight.T)
-        self.gradient_bias = gradient * self._gradient_bias
-        self.gradient = np.dot(gradient, self._gradient_x).T
-        # upgrade: the Negative gradient direction
-        self.weight = self.weight - self.learn_rate * self.gradient_weight
-        self.bias = self.bias - self.learn_rate * self.gradient_bias.T
-        # updates the weights and bias according to learning rate (0.3 if undefined)
-        return self.gradient
+            return 1
 
 
 class BPNN:
-    """
-    Back Propagation Neural Network model
-    """
-
     def __init__(self):
         self.layers = []
         self.train_mse = []
         self.fig_loss = plt.figure()
         self.ax_loss = self.fig_loss.add_subplot(1, 1, 1)
 
-    def add_layer(self, layer):
+    def add_layer(self, layer: DenseLayer):
         self.layers.append(layer)
 
     def build(self):
-        for i, layer in enumerate(self.layers[:]):
-            if i < 1:
+        for i, layer in enumerate(self.layers):
+            if i == 0:
                 layer.is_input_layer = True
             else:
                 layer.initializer(self.layers[i - 1].units)
 
     def summary(self):
-        for i, layer in enumerate(self.layers[:]):
+        for i, layer in enumerate(self.layers):
             print(f"------- layer {i} -------")
-            print("weight.shape ", np.shape(layer.weight))
-            print("bias.shape ", np.shape(layer.bias))
+            print("weight.shape ", layer.weight.shape)
+            print("bias.shape ", layer.bias.shape)
 
-    def train(self, xdata, ydata, train_round, accuracy):
+    def train(
+        self, xdata: np.ndarray, ydata: np.ndarray, train_round: int, accuracy: float
+    ):
         self.train_round = train_round
         self.accuracy = accuracy
 
-        self.ax_loss.hlines(self.accuracy, 0, self.train_round * 1.1)
+        self.ax_loss.hlines(accuracy, 0, train_round * 1.1)
 
-        x_shape = np.shape(xdata)
         for _ in range(train_round):
             all_loss = 0
-            for row in range(x_shape[0]):
-                _xdata = np.asmatrix(xdata[row, :]).T
-                _ydata = np.asmatrix(ydata[row, :]).T
+            for row in range(xdata.shape[0]):
+                _xdata = xdata[row, :].reshape(-1, 1)
+                _ydata = ydata[row, :].reshape(-1, 1)
 
                 # forward propagation
                 for layer in self.layers:
                     _xdata = layer.forward_propagation(_xdata)
 
                 loss, gradient = self.cal_loss(_ydata, _xdata)
-                all_loss = all_loss + loss
+                all_loss += loss
 
-                # back propagation: the input_layer does not upgrade
-                for layer in self.layers[:0:-1]:
+                # back propagation
+                for layer in reversed(self.layers):
                     gradient = layer.back_propagation(gradient)
 
-            mse = all_loss / x_shape[0]
+            mse = all_loss / xdata.shape[0]
             self.train_mse.append(mse)
 
             self.plot_loss()
 
-            if mse < self.accuracy:
+            if mse < accuracy:
                 print("----达到精度----")
                 return mse
         return None
 
-    def cal_loss(self, ydata, ydata_):
-        self.loss = np.sum(np.power((ydata - ydata_), 2))
-        self.loss_gradient = 2 * (ydata_ - ydata)
-        # vector (shape is the same as _ydata.shape)
-        return self.loss, self.loss_gradient
+    def cal_loss(self, ydata: np.ndarray, ydata_: np.ndarray) -> tuple:
+        loss = np.sum(np.power(ydata - ydata_, 2))
+        loss_gradient = 2 * (ydata_ - ydata)
+        return loss, loss_gradient
 
     def plot_loss(self):
         if self.ax_loss.lines:
diff --git a/strings/anagrams.py b/strings/anagrams.py
index fb9ac0bd1f45..56a210b05c7a 100644
--- a/strings/anagrams.py
+++ b/strings/anagrams.py
@@ -14,7 +14,7 @@ def signature(word: str) -> str:
     >>> signature("finaltest")
     'aefilnstt'
     """
-    return "".join(sorted(word))
+    return "".join(sorted(word.replace(" ", "").lower()))
 
 
 def anagram(my_word: str) -> list[str]:
@@ -29,16 +29,39 @@ def anagram(my_word: str) -> list[str]:
     return word_by_signature[signature(my_word)]
 
 
-data: str = Path(__file__).parent.joinpath("words.txt").read_text(encoding="utf-8")
-word_list = sorted({word.strip().lower() for word in data.splitlines()})
+def load_word_list(file_path: Path) -> list[str]:
+    """Load a list of words from a file"""
+    return sorted(
+        {
+            word.strip().lower()
+            for word in file_path.read_text(encoding="utf-8").splitlines()
+        }
+    )
 
-word_by_signature = collections.defaultdict(list)
-for word in word_list:
-    word_by_signature[signature(word)].append(word)
 
-if __name__ == "__main__":
-    all_anagrams = {word: anagram(word) for word in word_list if len(anagram(word)) > 1}
+def create_word_by_signature(word_list: list[str]) -> dict[str, list[str]]:
+    """Create a dictionary mapping word signatures to lists of words"""
+    word_by_signature = collections.defaultdict(list)
+    for word in word_list:
+        word_by_signature[signature(word)].append(word)
+    return word_by_signature
+
+
+def find_all_anagrams(word_list: list[str]) -> dict[str, list[str]]:
+    """Find all anagrams in a list of words"""
+    word_by_signature = create_word_by_signature(word_list)
+    return {word: anagram(word) for word in word_list if len(anagram(word)) > 1}
+
+
+def main():
+    data_file = Path(__file__).parent.joinpath("words.txt")
+    word_list = load_word_list(data_file)
+    all_anagrams = find_all_anagrams(word_list)
 
     with open("anagrams.txt", "w") as file:
         file.write("all_anagrams = \n ")
         file.write(pprint.pformat(all_anagrams))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/strings/capitalize.py b/strings/capitalize.py
index c0b45e0d9614..767f8b22da5d 100644
--- a/strings/capitalize.py
+++ b/strings/capitalize.py
@@ -1,6 +1,3 @@
-from string import ascii_lowercase, ascii_uppercase
-
-
 def capitalize(sentence: str) -> str:
     """
     Capitalizes the first letter of a sentence or word.
@@ -16,17 +13,4 @@ def capitalize(sentence: str) -> str:
     >>> capitalize("")
     ''
     """
-    if not sentence:
-        return ""
-
-    # Create a dictionary that maps lowercase letters to uppercase letters
-    # Capitalize the first character if it's a lowercase letter
-    # Concatenate the capitalized character with the rest of the string
-    lower_to_upper = dict(zip(ascii_lowercase, ascii_uppercase))
-    return lower_to_upper.get(sentence[0], sentence[0]) + sentence[1:]
-
-
-if __name__ == "__main__":
-    from doctest import testmod
-
-    testmod()
+    return sentence.capitalize()