datawhalechina
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎codes/ch24/cnn-text-classification.py‎
Lines changed: 39 additions & 40 deletions b/‎codes/ch24/cnn-text-classification.py‎
Lines changed: 39 additions & 40 deletions
diff --git a/‎codes/ch27/bi-lstm-text-classification.py‎
Lines changed: 41 additions & 41 deletions b/‎codes/ch27/bi-lstm-text-classification.py‎
Lines changed: 41 additions & 41 deletions
diff --git a/‎notebook/part03/notes/ch23.ipynb‎
Lines changed: 16 additions & 7 deletions b/‎notebook/part03/notes/ch23.ipynb‎
Lines changed: 16 additions & 7 deletions
@@ -339,3 +339,4 @@ dist
 /notebook/part03/notes/data/
 /docs/.vitepress/cache/
 /codes/summary/
+/codes/ch23/data/
@@ -35,53 +35,52 @@ def to_map_style_dataset(iter_data):
     return list(iter_data)
 
 
-def download_file(filename, filepath):
-    base_urls = [
-        "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
-        "https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
-        "https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
-    ]
-
-    print(f"Attempting to download {filename}...")
-
-    for base_url in base_urls:
-        url = base_url + filename
-        print(f"Trying {url} ...")
-        try:
-            # Try with verification first, then without if it fails with SSLError
-            try:
-                response = requests.get(url, stream=True, timeout=10)
-            except requests.exceptions.SSLError:
-                print(f"SSL Error with {url}, trying without verification...")
-                response = requests.get(url, stream=True, timeout=10, verify=False)
-
-            if response.status_code == 200:
-                with open(filepath, 'wb') as f:
-                    for chunk in response.iter_content(chunk_size=8192):
-                        f.write(chunk)
-                print(f"Downloaded successfully from {url}")
-                return
-            else:
-                print(f"Failed to download from {url}, status code: {response.status_code}")
-        except Exception as e:
-            print(f"Error downloading from {url}: {e}")
-
-    # If all mirrors fail
-    raise RuntimeError(
-        f"Failed to download {filename} from all mirrors.\n"
-        f"Please manually download 'train.csv' and 'test.csv' from "
-        f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
-        f"and place them in {os.path.dirname(filepath)}"
-    )
-
-
 def AG_NEWS(root='./data'):
     base_path = os.path.join(root, 'datasets', 'AG_NEWS')
     os.makedirs(base_path, exist_ok=True)
 
     train_path = os.path.join(base_path, 'train.csv')
     test_path = os.path.join(base_path, 'test.csv')
 
+    def download_file(filename, filepath):
+        base_urls = [
+            "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
+            "https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
+            "https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
+        ]
+
+        print(f"Attempting to download {filename}...")
+
+        for base_url in base_urls:
+            url = base_url + filename
+            print(f"Trying {url} ...")
+            try:
+                # Try with verification first, then without if it fails with SSLError
+                try:
+                    response = requests.get(url, stream=True, timeout=10)
+                except requests.exceptions.SSLError:
+                    print(f"SSL Error with {url}, trying without verification...")
+                    response = requests.get(url, stream=True, timeout=10, verify=False)
+
+                if response.status_code == 200:
+                    with open(filepath, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+                    print(f"Downloaded successfully from {url}")
+                    return
+                else:
+                    print(f"Failed to download from {url}, status code: {response.status_code}")
+            except Exception as e:
+                print(f"Error downloading from {url}: {e}")
+
+        # If all mirrors fail
+        raise RuntimeError(
+            f"Failed to download {filename} from all mirrors.\n"
+            f"Please manually download 'train.csv' and 'test.csv' from "
+            f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
+            f"and place them in {os.path.dirname(filepath)}"
+        )
+
     if not os.path.exists(train_path):
         download_file("train.csv", train_path)
 
 
@@ -8,6 +8,7 @@
 @desc: 习题27.1 基于双向LSTM的ELMo预训练语言模型，假设下游任务是文本分类
 """
 import csv
+
 import os
 import time
 
@@ -28,52 +29,51 @@ def to_map_style_dataset(iter_data):
     return list(iter_data)
 
 
-def download_file(filename, filepath):
-    base_urls = [
-        "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
-        "https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
-        "https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
-    ]
-
-    print(f"Attempting to download {filename}...")
-
-    for base_url in base_urls:
-        url = base_url + filename
-        print(f"Trying {url} ...")
-        try:
-            try:
-                response = requests.get(url, stream=True, timeout=10)
-            except requests.exceptions.SSLError:
-                print(f"SSL Error with {url}, trying without verification...")
-                response = requests.get(url, stream=True, timeout=10, verify=False)
-
-            if response.status_code == 200:
-                with open(filepath, 'wb') as f:
-                    for chunk in response.iter_content(chunk_size=8192):
-                        f.write(chunk)
-                print(f"Downloaded successfully from {url}")
-                return
-            else:
-                print(f"Failed to download from {url}, status code: {response.status_code}")
-        except Exception as e:
-            print(f"Error downloading from {url}: {e}")
-
-    # If all mirrors fail
-    raise RuntimeError(
-        f"Failed to download {filename} from all mirrors.\n"
-        f"Please manually download 'train.csv' and 'test.csv' from "
-        f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
-        f"and place them in {os.path.dirname(filepath)}"
-    )
-
-
 def AG_NEWS(root='./data'):
     base_path = os.path.join(root, 'datasets', 'AG_NEWS')
     os.makedirs(base_path, exist_ok=True)
 
     train_path = os.path.join(base_path, 'train.csv')
     test_path = os.path.join(base_path, 'test.csv')
 
+    def download_file(filename, filepath):
+        base_urls = [
+            "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
+            "https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
+            "https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
+        ]
+
+        print(f"Attempting to download {filename}...")
+
+        for base_url in base_urls:
+            url = base_url + filename
+            print(f"Trying {url} ...")
+            try:
+                try:
+                    response = requests.get(url, stream=True, timeout=10)
+                except requests.exceptions.SSLError:
+                    print(f"SSL Error with {url}, trying without verification...")
+                    response = requests.get(url, stream=True, timeout=10, verify=False)
+
+                if response.status_code == 200:
+                    with open(filepath, 'wb') as f:
+                        for chunk in response.iter_content(chunk_size=8192):
+                            f.write(chunk)
+                    print(f"Downloaded successfully from {url}")
+                    return
+                else:
+                    print(f"Failed to download from {url}, status code: {response.status_code}")
+            except Exception as e:
+                print(f"Error downloading from {url}: {e}")
+
+        # If all mirrors fail
+        raise RuntimeError(
+            f"Failed to download {filename} from all mirrors.\n"
+            f"Please manually download 'train.csv' and 'test.csv' from "
+            f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
+            f"and place them in {os.path.dirname(filepath)}"
+        )
+
     if not os.path.exists(train_path):
         download_file("train.csv", train_path)
 
@@ -101,10 +101,10 @@ def get_elmo_model():
     elmo_options_file = './data/elmo_2x1024_128_2048cnn_1xhighway_options.json'
     elmo_weight_file = './data/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
     url = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
-    if (not os.path.exists(elmo_options_file)):
+    if not os.path.exists(elmo_options_file):
         wget.download(url, elmo_options_file)
     url = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"
-    if (not os.path.exists(elmo_weight_file)):
+    if not os.path.exists(elmo_weight_file):
         wget.download(url, elmo_weight_file)
 
     print("Initializing ELMo model...")
 
@@ -3,7 +3,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "# 第23章前馈神经网络"
@@ -12,7 +15,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "## 习题23.1"
@@ -21,7 +27,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
    },
    "source": [
     "&emsp;&emsp;构造前馈神经网络实现逻辑表达式XNOR，使用S型函数为激活函数。"
@@ -571,7 +580,7 @@
    "outputs": [],
    "source": [
     "# 加载MNIST手写数字数据集\n",
-    "mnist = fetch_openml('mnist_784', parser='auto')\n",
+    "mnist = fetch_openml('mnist_784', parser='auto', data_home='./data')\n",
     "X = mnist.data.astype('float32') / 255.0\n",
     "y = mnist.target.astype('int')"
    ]
@@ -599,7 +608,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:21<00:00, 14.20s/it]\n"
+      "100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:17<00:00,  7.79s/it]\n"
      ]
     }
    ],
@@ -1322,7 +1331,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.12.10"
   },
   "toc": {
    "base_numbering": 1,
@@ -1349,5 +1358,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 1
+ "nbformat_minor": 4
 }