Skip to content

Commit 3458804

Browse files
committed
👌 校对完成第3篇的notebook
1 parent fecd40a commit 3458804

File tree

9 files changed

+487
-195
lines changed

9 files changed

+487
-195
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,3 +339,4 @@ dist
339339
/notebook/part03/notes/data/
340340
/docs/.vitepress/cache/
341341
/codes/summary/
342+
/codes/ch23/data/

codes/ch24/cnn-text-classification.py

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -35,53 +35,52 @@ def to_map_style_dataset(iter_data):
3535
return list(iter_data)
3636

3737

38-
def download_file(filename, filepath):
39-
base_urls = [
40-
"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
41-
"https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
42-
"https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
43-
]
44-
45-
print(f"Attempting to download {filename}...")
46-
47-
for base_url in base_urls:
48-
url = base_url + filename
49-
print(f"Trying {url} ...")
50-
try:
51-
# Try with verification first, then without if it fails with SSLError
52-
try:
53-
response = requests.get(url, stream=True, timeout=10)
54-
except requests.exceptions.SSLError:
55-
print(f"SSL Error with {url}, trying without verification...")
56-
response = requests.get(url, stream=True, timeout=10, verify=False)
57-
58-
if response.status_code == 200:
59-
with open(filepath, 'wb') as f:
60-
for chunk in response.iter_content(chunk_size=8192):
61-
f.write(chunk)
62-
print(f"Downloaded successfully from {url}")
63-
return
64-
else:
65-
print(f"Failed to download from {url}, status code: {response.status_code}")
66-
except Exception as e:
67-
print(f"Error downloading from {url}: {e}")
68-
69-
# If all mirrors fail
70-
raise RuntimeError(
71-
f"Failed to download {filename} from all mirrors.\n"
72-
f"Please manually download 'train.csv' and 'test.csv' from "
73-
f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
74-
f"and place them in {os.path.dirname(filepath)}"
75-
)
76-
77-
7838
def AG_NEWS(root='./data'):
7939
base_path = os.path.join(root, 'datasets', 'AG_NEWS')
8040
os.makedirs(base_path, exist_ok=True)
8141

8242
train_path = os.path.join(base_path, 'train.csv')
8343
test_path = os.path.join(base_path, 'test.csv')
8444

45+
def download_file(filename, filepath):
46+
base_urls = [
47+
"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
48+
"https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
49+
"https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
50+
]
51+
52+
print(f"Attempting to download {filename}...")
53+
54+
for base_url in base_urls:
55+
url = base_url + filename
56+
print(f"Trying {url} ...")
57+
try:
58+
# Try with verification first, then without if it fails with SSLError
59+
try:
60+
response = requests.get(url, stream=True, timeout=10)
61+
except requests.exceptions.SSLError:
62+
print(f"SSL Error with {url}, trying without verification...")
63+
response = requests.get(url, stream=True, timeout=10, verify=False)
64+
65+
if response.status_code == 200:
66+
with open(filepath, 'wb') as f:
67+
for chunk in response.iter_content(chunk_size=8192):
68+
f.write(chunk)
69+
print(f"Downloaded successfully from {url}")
70+
return
71+
else:
72+
print(f"Failed to download from {url}, status code: {response.status_code}")
73+
except Exception as e:
74+
print(f"Error downloading from {url}: {e}")
75+
76+
# If all mirrors fail
77+
raise RuntimeError(
78+
f"Failed to download {filename} from all mirrors.\n"
79+
f"Please manually download 'train.csv' and 'test.csv' from "
80+
f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
81+
f"and place them in {os.path.dirname(filepath)}"
82+
)
83+
8584
if not os.path.exists(train_path):
8685
download_file("train.csv", train_path)
8786

codes/ch27/bi-lstm-text-classification.py

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
@desc: 习题27.1 基于双向LSTM的ELMo预训练语言模型,假设下游任务是文本分类
99
"""
1010
import csv
11+
1112
import os
1213
import time
1314

@@ -28,52 +29,51 @@ def to_map_style_dataset(iter_data):
2829
return list(iter_data)
2930

3031

31-
def download_file(filename, filepath):
32-
base_urls = [
33-
"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
34-
"https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
35-
"https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
36-
]
37-
38-
print(f"Attempting to download {filename}...")
39-
40-
for base_url in base_urls:
41-
url = base_url + filename
42-
print(f"Trying {url} ...")
43-
try:
44-
try:
45-
response = requests.get(url, stream=True, timeout=10)
46-
except requests.exceptions.SSLError:
47-
print(f"SSL Error with {url}, trying without verification...")
48-
response = requests.get(url, stream=True, timeout=10, verify=False)
49-
50-
if response.status_code == 200:
51-
with open(filepath, 'wb') as f:
52-
for chunk in response.iter_content(chunk_size=8192):
53-
f.write(chunk)
54-
print(f"Downloaded successfully from {url}")
55-
return
56-
else:
57-
print(f"Failed to download from {url}, status code: {response.status_code}")
58-
except Exception as e:
59-
print(f"Error downloading from {url}: {e}")
60-
61-
# If all mirrors fail
62-
raise RuntimeError(
63-
f"Failed to download {filename} from all mirrors.\n"
64-
f"Please manually download 'train.csv' and 'test.csv' from "
65-
f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
66-
f"and place them in {os.path.dirname(filepath)}"
67-
)
68-
69-
7032
def AG_NEWS(root='./data'):
7133
base_path = os.path.join(root, 'datasets', 'AG_NEWS')
7234
os.makedirs(base_path, exist_ok=True)
7335

7436
train_path = os.path.join(base_path, 'train.csv')
7537
test_path = os.path.join(base_path, 'test.csv')
7638

39+
def download_file(filename, filepath):
40+
base_urls = [
41+
"https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
42+
"https://ghproxy.net/https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/",
43+
"https://fastly.jsdelivr.net/gh/mhjabreel/CharCnn_Keras@master/data/ag_news_csv/"
44+
]
45+
46+
print(f"Attempting to download {filename}...")
47+
48+
for base_url in base_urls:
49+
url = base_url + filename
50+
print(f"Trying {url} ...")
51+
try:
52+
try:
53+
response = requests.get(url, stream=True, timeout=10)
54+
except requests.exceptions.SSLError:
55+
print(f"SSL Error with {url}, trying without verification...")
56+
response = requests.get(url, stream=True, timeout=10, verify=False)
57+
58+
if response.status_code == 200:
59+
with open(filepath, 'wb') as f:
60+
for chunk in response.iter_content(chunk_size=8192):
61+
f.write(chunk)
62+
print(f"Downloaded successfully from {url}")
63+
return
64+
else:
65+
print(f"Failed to download from {url}, status code: {response.status_code}")
66+
except Exception as e:
67+
print(f"Error downloading from {url}: {e}")
68+
69+
# If all mirrors fail
70+
raise RuntimeError(
71+
f"Failed to download {filename} from all mirrors.\n"
72+
f"Please manually download 'train.csv' and 'test.csv' from "
73+
f"https://github.com/mhjabreel/CharCnn_Keras/tree/master/data/ag_news_csv "
74+
f"and place them in {os.path.dirname(filepath)}"
75+
)
76+
7777
if not os.path.exists(train_path):
7878
download_file("train.csv", train_path)
7979

@@ -101,10 +101,10 @@ def get_elmo_model():
101101
elmo_options_file = './data/elmo_2x1024_128_2048cnn_1xhighway_options.json'
102102
elmo_weight_file = './data/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5'
103103
url = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
104-
if (not os.path.exists(elmo_options_file)):
104+
if not os.path.exists(elmo_options_file):
105105
wget.download(url, elmo_options_file)
106106
url = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"
107-
if (not os.path.exists(elmo_weight_file)):
107+
if not os.path.exists(elmo_weight_file):
108108
wget.download(url, elmo_weight_file)
109109

110110
print("Initializing ELMo model...")

notebook/part03/notes/ch23.ipynb

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
{
44
"cell_type": "markdown",
55
"metadata": {
6-
"collapsed": true
6+
"collapsed": true,
7+
"jupyter": {
8+
"outputs_hidden": true
9+
}
710
},
811
"source": [
912
"# 第23章前馈神经网络"
@@ -12,7 +15,10 @@
1215
{
1316
"cell_type": "markdown",
1417
"metadata": {
15-
"collapsed": true
18+
"collapsed": true,
19+
"jupyter": {
20+
"outputs_hidden": true
21+
}
1622
},
1723
"source": [
1824
"## 习题23.1"
@@ -21,7 +27,10 @@
2127
{
2228
"cell_type": "markdown",
2329
"metadata": {
24-
"collapsed": true
30+
"collapsed": true,
31+
"jupyter": {
32+
"outputs_hidden": true
33+
}
2534
},
2635
"source": [
2736
"  构造前馈神经网络实现逻辑表达式XNOR,使用S型函数为激活函数。"
@@ -571,7 +580,7 @@
571580
"outputs": [],
572581
"source": [
573582
"# 加载MNIST手写数字数据集\n",
574-
"mnist = fetch_openml('mnist_784', parser='auto')\n",
583+
"mnist = fetch_openml('mnist_784', parser='auto', data_home='./data')\n",
575584
"X = mnist.data.astype('float32') / 255.0\n",
576585
"y = mnist.target.astype('int')"
577586
]
@@ -599,7 +608,7 @@
599608
"name": "stderr",
600609
"output_type": "stream",
601610
"text": [
602-
"100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [02:21<00:00, 14.20s/it]\n"
611+
"100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:17<00:00, 7.79s/it]\n"
603612
]
604613
}
605614
],
@@ -1322,7 +1331,7 @@
13221331
"name": "python",
13231332
"nbconvert_exporter": "python",
13241333
"pygments_lexer": "ipython3",
1325-
"version": "3.10.5"
1334+
"version": "3.12.10"
13261335
},
13271336
"toc": {
13281337
"base_numbering": 1,
@@ -1349,5 +1358,5 @@
13491358
}
13501359
},
13511360
"nbformat": 4,
1352-
"nbformat_minor": 1
1361+
"nbformat_minor": 4
13531362
}

0 commit comments

Comments
 (0)