Skip to content

Commit af668b3

Browse files
committed
Rebuild
1 parent 7e3950b commit af668b3

File tree

116 files changed

+238888
-239206
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+238888
-239206
lines changed

β€Ždocs/_downloads/462f53ac0f7c6840743ad8655c43102c/torchtext_translation.pyβ€Ž

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@
3636
#
3737
# ::
3838
#
39-
# python -m spacy download en
40-
# python -m spacy download de
39+
# python -m spacy download en_core_web_sm
40+
# python -m spacy download de_core_news_sm
4141

4242
import torchtext
4343
import torch
4444
from torchtext.data.utils import get_tokenizer
4545
from collections import Counter
46-
from torchtext.vocab import Vocab, vocab
46+
from torchtext.vocab import vocab
4747
from torchtext.utils import download_from_url, extract_archive
4848
import io
4949

@@ -56,8 +56,8 @@
5656
val_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in val_urls]
5757
test_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in test_urls]
5858

59-
de_tokenizer = get_tokenizer('spacy', language='de')
60-
en_tokenizer = get_tokenizer('spacy', language='en')
59+
de_tokenizer = get_tokenizer('spacy', language='de_core_news_sm')
60+
en_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')
6161

6262
def build_vocab(filepath, tokenizer):
6363
counter = Counter()

β€Ždocs/_downloads/6fbbb25a2ddfe5bf93b618f53cf7077e/torchtext_translation.ipynbβ€Ž

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"cell_type": "markdown",
2323
"metadata": {},
2424
"source": [
25-
"## \ub370\uc774\ud130 \ucc98\ub9ac\ud558\uae30\n\n``torchtext`` \uc5d0\ub294 \uc5b8\uc5b4 \ubcc0\ud658 \ubaa8\ub378\uc744 \ub9cc\ub4e4 \ub54c \uc27d\uac8c \uc0ac\uc6a9\ud560 \uc218 \uc788\ub294 \ub370\uc774\ud130\uc14b\uc744 \ub9cc\ub4e4\uae30 \uc801\ud569\ud55c \ub2e4\uc591\ud55c \ub3c4\uad6c\uac00 \uc788\uc2b5\ub2c8\ub2e4.\n\uc774 \uc608\uc81c\uc5d0\uc11c\ub294 \uac00\uacf5\ub418\uc9c0 \uc54a\uc740 \ud14d\uc2a4\ud2b8 \ubb38\uc7a5(raw text sentence)\uc744 \ud1a0\ud070\ud654(tokenize)\ud558\uace0, \uc5b4\ud718\uc9d1(vocabulary)\uc744 \ub9cc\ub4e4\uace0,\n\ud1a0\ud070\uc744 \ud150\uc11c\ub85c \uc22b\uc790\ud654(numericalize)\ud558\ub294 \ubc29\ubc95\uc744 \uc54c\uc544\ubcf4\uaca0\uc2b5\ub2c8\ub2e4.\n\n\ucc38\uace0 : \uc774 \ud29c\ud1a0\ub9ac\uc5bc\uc5d0\uc11c\uc758 \ud1a0\ud070\ud654(tokenization)\uc5d0\ub294 [Spacy](https://spacy.io)_ \uac00 \ud544\uc694\ud569\ub2c8\ub2e4.\nSpacy\ub294 \uc601\uc5b4 \uc774 \uc678\uc758 \ub2e4\ub978 \uc5b8\uc5b4\uc5d0 \ub300\ud55c \uac15\ub825\ud55c \ud1a0\ud070\ud654 \uae30\ub2a5\uc744 \uc81c\uacf5\ud558\uae30 \ub54c\ubb38\uc5d0 \uc0ac\uc6a9\ud569\ub2c8\ub2e4. ``torchtext`` \ub294\n`basic_english`` \ud1a0\ud06c\ub098\uc774\uc800\ub97c \uc81c\uacf5\ud560 \ubfd0 \uc544\ub2c8\ub77c \uc601\uc5b4\uc5d0 \uc0ac\uc6a9\ud560 \uc218 \uc788\ub294 \ub2e4\ub978 \ud1a0\ud06c\ub098\uc774\uc800\ub4e4(\uc608\ucee8\ub370\n[Moses](https://bitbucket.org/luismsgomes/mosestokenizer/src/default/)_ )\uc744 \uc9c0\uc6d0\ud569\ub2c8\ub2e4\ub9cc, \uc5b8\uc5b4 \ubc88\uc5ed\uc744 \uc704\ud574\uc11c\ub294 \ub2e4\uc591\ud55c \uc5b8\uc5b4\ub97c\n\ub2e4\ub8e8\uc5b4\uc57c \ud558\uae30 \ub54c\ubb38\uc5d0 Spacy\uac00 \uac00\uc7a5 \uc801\ud569\ud569\ub2c8\ub2e4.\n\n\uc774 \ud29c\ud1a0\ub9ac\uc5bc\uc744 \uc2e4\ud589\ud558\ub824\uba74, \uc6b0\uc120 ``pip`` \ub098 ``conda`` \ub85c ``spacy`` \ub97c \uc124\uce58\ud558\uc138\uc694. \uadf8 \ub2e4\uc74c,\nSpacy \ud1a0\ud06c\ub098\uc774\uc800\uac00 \uc4f8 \uc601\uc5b4\uc640 \ub3c5\uc77c\uc5b4\uc5d0 \ub300\ud55c \ub370\uc774\ud130\ub97c \ub2e4\uc6b4\ub85c\ub4dc \ubc1b\uc2b5\ub2c8\ub2e4.\n\n::\n\n python -m spacy download en\n python -m spacy download de\n\n"
25+
"## \ub370\uc774\ud130 \ucc98\ub9ac\ud558\uae30\n\n``torchtext`` \uc5d0\ub294 \uc5b8\uc5b4 \ubcc0\ud658 \ubaa8\ub378\uc744 \ub9cc\ub4e4 \ub54c \uc27d\uac8c \uc0ac\uc6a9\ud560 \uc218 \uc788\ub294 \ub370\uc774\ud130\uc14b\uc744 \ub9cc\ub4e4\uae30 \uc801\ud569\ud55c \ub2e4\uc591\ud55c \ub3c4\uad6c\uac00 \uc788\uc2b5\ub2c8\ub2e4.\n\uc774 \uc608\uc81c\uc5d0\uc11c\ub294 \uac00\uacf5\ub418\uc9c0 \uc54a\uc740 \ud14d\uc2a4\ud2b8 \ubb38\uc7a5(raw text sentence)\uc744 \ud1a0\ud070\ud654(tokenize)\ud558\uace0, \uc5b4\ud718\uc9d1(vocabulary)\uc744 \ub9cc\ub4e4\uace0,\n\ud1a0\ud070\uc744 \ud150\uc11c\ub85c \uc22b\uc790\ud654(numericalize)\ud558\ub294 \ubc29\ubc95\uc744 \uc54c\uc544\ubcf4\uaca0\uc2b5\ub2c8\ub2e4.\n\n\ucc38\uace0 : \uc774 \ud29c\ud1a0\ub9ac\uc5bc\uc5d0\uc11c\uc758 \ud1a0\ud070\ud654(tokenization)\uc5d0\ub294 [Spacy](https://spacy.io)_ \uac00 \ud544\uc694\ud569\ub2c8\ub2e4.\nSpacy\ub294 \uc601\uc5b4 \uc774 \uc678\uc758 \ub2e4\ub978 \uc5b8\uc5b4\uc5d0 \ub300\ud55c \uac15\ub825\ud55c \ud1a0\ud070\ud654 \uae30\ub2a5\uc744 \uc81c\uacf5\ud558\uae30 \ub54c\ubb38\uc5d0 \uc0ac\uc6a9\ud569\ub2c8\ub2e4. ``torchtext`` \ub294\n`basic_english`` \ud1a0\ud06c\ub098\uc774\uc800\ub97c \uc81c\uacf5\ud560 \ubfd0 \uc544\ub2c8\ub77c \uc601\uc5b4\uc5d0 \uc0ac\uc6a9\ud560 \uc218 \uc788\ub294 \ub2e4\ub978 \ud1a0\ud06c\ub098\uc774\uc800\ub4e4(\uc608\ucee8\ub370\n[Moses](https://bitbucket.org/luismsgomes/mosestokenizer/src/default/)_ )\uc744 \uc9c0\uc6d0\ud569\ub2c8\ub2e4\ub9cc, \uc5b8\uc5b4 \ubc88\uc5ed\uc744 \uc704\ud574\uc11c\ub294 \ub2e4\uc591\ud55c \uc5b8\uc5b4\ub97c\n\ub2e4\ub8e8\uc5b4\uc57c \ud558\uae30 \ub54c\ubb38\uc5d0 Spacy\uac00 \uac00\uc7a5 \uc801\ud569\ud569\ub2c8\ub2e4.\n\n\uc774 \ud29c\ud1a0\ub9ac\uc5bc\uc744 \uc2e4\ud589\ud558\ub824\uba74, \uc6b0\uc120 ``pip`` \ub098 ``conda`` \ub85c ``spacy`` \ub97c \uc124\uce58\ud558\uc138\uc694. \uadf8 \ub2e4\uc74c,\nSpacy \ud1a0\ud06c\ub098\uc774\uc800\uac00 \uc4f8 \uc601\uc5b4\uc640 \ub3c5\uc77c\uc5b4\uc5d0 \ub300\ud55c \ub370\uc774\ud130\ub97c \ub2e4\uc6b4\ub85c\ub4dc \ubc1b\uc2b5\ub2c8\ub2e4.\n\n::\n\n python -m spacy download en_core_web_sm\n python -m spacy download de_core_news_sm\n\n"
2626
]
2727
},
2828
{
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"import torchtext\nimport torch\nfrom torchtext.data.utils import get_tokenizer\nfrom collections import Counter\nfrom torchtext.vocab import Vocab, vocab\nfrom torchtext.utils import download_from_url, extract_archive\nimport io\n\nurl_base = 'https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/'\ntrain_urls = ('train.de.gz', 'train.en.gz')\nval_urls = ('val.de.gz', 'val.en.gz')\ntest_urls = ('test_2016_flickr.de.gz', 'test_2016_flickr.en.gz')\n\ntrain_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in train_urls]\nval_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in val_urls]\ntest_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in test_urls]\n\nde_tokenizer = get_tokenizer('spacy', language='de')\nen_tokenizer = get_tokenizer('spacy', language='en')\n\ndef build_vocab(filepath, tokenizer):\n counter = Counter()\n with io.open(filepath, encoding=\"utf8\") as f:\n for string_ in f:\n counter.update(tokenizer(string_))\n return vocab(counter, specials=['<unk>', '<pad>', '<bos>', '<eos>'])\n\nde_vocab = build_vocab(train_filepaths[0], de_tokenizer)\nen_vocab = build_vocab(train_filepaths[1], en_tokenizer)\n\ndef data_process(filepaths):\n raw_de_iter = iter(io.open(filepaths[0], encoding=\"utf8\"))\n raw_en_iter = iter(io.open(filepaths[1], encoding=\"utf8\"))\n data = []\n for (raw_de, raw_en) in zip(raw_de_iter, raw_en_iter):\n de_tensor_ = torch.tensor([de_vocab[token] for token in de_tokenizer(raw_de)],\n dtype=torch.long)\n en_tensor_ = torch.tensor([en_vocab[token] for token in en_tokenizer(raw_en)],\n dtype=torch.long)\n data.append((de_tensor_, en_tensor_))\n return data\n\ntrain_data = data_process(train_filepaths)\nval_data = data_process(val_filepaths)\ntest_data = data_process(test_filepaths)"
36+
"import torchtext\nimport torch\nfrom torchtext.data.utils import get_tokenizer\nfrom collections import Counter\nfrom torchtext.vocab import vocab\nfrom torchtext.utils import download_from_url, extract_archive\nimport io\n\nurl_base = 'https://raw.githubusercontent.com/multi30k/dataset/master/data/task1/raw/'\ntrain_urls = ('train.de.gz', 'train.en.gz')\nval_urls = ('val.de.gz', 'val.en.gz')\ntest_urls = ('test_2016_flickr.de.gz', 'test_2016_flickr.en.gz')\n\ntrain_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in train_urls]\nval_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in val_urls]\ntest_filepaths = [extract_archive(download_from_url(url_base + url))[0] for url in test_urls]\n\nde_tokenizer = get_tokenizer('spacy', language='de_core_news_sm')\nen_tokenizer = get_tokenizer('spacy', language='en_core_web_sm')\n\ndef build_vocab(filepath, tokenizer):\n counter = Counter()\n with io.open(filepath, encoding=\"utf8\") as f:\n for string_ in f:\n counter.update(tokenizer(string_))\n return vocab(counter, specials=['<unk>', '<pad>', '<bos>', '<eos>'])\n\nde_vocab = build_vocab(train_filepaths[0], de_tokenizer)\nen_vocab = build_vocab(train_filepaths[1], en_tokenizer)\n\ndef data_process(filepaths):\n raw_de_iter = iter(io.open(filepaths[0], encoding=\"utf8\"))\n raw_en_iter = iter(io.open(filepaths[1], encoding=\"utf8\"))\n data = []\n for (raw_de, raw_en) in zip(raw_de_iter, raw_en_iter):\n de_tensor_ = torch.tensor([de_vocab[token] for token in de_tokenizer(raw_de)],\n dtype=torch.long)\n en_tensor_ = torch.tensor([en_vocab[token] for token in en_tokenizer(raw_en)],\n dtype=torch.long)\n data.append((de_tensor_, en_tensor_))\n return data\n\ntrain_data = data_process(train_filepaths)\nval_data = data_process(val_filepaths)\ntest_data = data_process(test_filepaths)"
3737
]
3838
},
3939
{

β€Ždocs/_downloads/d9398fce39ca80dc4bb8b8ea55b575a8/nn_tutorial.ipynbβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
},
3434
"outputs": [],
3535
"source": [
36-
"from pathlib import Path\nimport requests\n\nDATA_PATH = Path(\"data\")\nPATH = DATA_PATH / \"mnist\"\n\nPATH.mkdir(parents=True, exist_ok=True)\n\nURL = \"https://github.com/pytorch/tutorials/raw/master/_static/\"\nFILENAME = \"mnist.pkl.gz\"\n\nif not (PATH / FILENAME).exists():\n content = requests.get(URL + FILENAME).content\n (PATH / FILENAME).open(\"wb\").write(content)"
36+
"from pathlib import Path\nimport requests\n\nDATA_PATH = Path(\"data\")\nPATH = DATA_PATH / \"mnist\"\n\nPATH.mkdir(parents=True, exist_ok=True)\n\nURL = \"https://github.com/pytorch/tutorials/raw/main/_static/\"\nFILENAME = \"mnist.pkl.gz\"\n\nif not (PATH / FILENAME).exists():\n content = requests.get(URL + FILENAME).content\n (PATH / FILENAME).open(\"wb\").write(content)"
3737
]
3838
},
3939
{

β€Ždocs/_downloads/f16255c783f9e487235b8eff6c8792b9/nn_tutorial.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353

5454
PATH.mkdir(parents=True, exist_ok=True)
5555

56-
URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
56+
URL = "https://github.com/pytorch/tutorials/raw/main/_static/"
5757
FILENAME = "mnist.pkl.gz"
5858

5959
if not (PATH / FILENAME).exists():
-277 Bytes
Loading
-135 Bytes
Loading
-15 Bytes
Loading
-699 Bytes
Loading
-2.75 KB
Loading
-350 Bytes
Loading

0 commit comments

Comments
Β (0)