Skip to content

Commit cff17ae

Browse files
authored
Merge pull request #103 from neutron0831/chapter/03/regular-expression
Chapter 3: Regular Expression
2 parents 5abb560 + 7765dee commit cff17ae

34 files changed

+1348
-65
lines changed

.github/workflows/test.yml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ jobs:
1414

1515
steps:
1616
- uses: actions/checkout@v3
17+
with:
18+
fetch-depth: 0
1719
- uses: actions/setup-node@v3
1820
with:
1921
node-version: latest
@@ -26,8 +28,7 @@ jobs:
2628
- name: Get pnpm store directory
2729
id: pnpm-cache
2830
shell: bash
29-
run: |
30-
echo "STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT
31+
run: echo "STORE_PATH=$(pnpm store path)" >> $GITHUB_OUTPUT
3132
- uses: actions/cache@v3
3233
name: Setup pnpm cache
3334
with:
@@ -38,4 +39,8 @@ jobs:
3839
- name: Install dependencies
3940
run: pnpm install
4041
- name: Test
41-
run: pnpm test
42+
env:
43+
NODE_OPTIONS: --max_old_space_size=6144
44+
run: |
45+
TEST_FILES=$(git diff --diff-filter=ACMR --name-only origin/${{ github.base_ref }}...origin/${{ github.head_ref }} **/*.test.ts)
46+
pnpm vitest --silent --run $TEST_FILES

.lintstagedrc.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
module.exports = {
22
'*.{js,ts,vue}': ['eslint --fix'],
33
'*.{js,ts,json}': ['prettier --write'],
4-
'*.test.{js,ts}': ['vitest --silent --run'],
4+
'*.test.{js,ts}': [
5+
'cross-env NODE_OPTIONS="--max-old-space-size=6144" vitest --silent --run',
6+
],
57
}

CHANGELOG.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,50 @@
11
# Changelog
22

3+
<a name="0.3.0"></a>
4+
## [v0.3.0](https://github.com/neutron0831/nlp100-ts/compare/v0.2.0...v0.3.0) (2023-04-19)
5+
6+
### Added
7+
8+
- 👷‍♂️ Run `test.yml` only for untested tests [[82a2fa9](https://github.com/neutron0831/nlp100-ts/commit/82a2fa9)]
9+
- ✅ Add Exercise 29 test ([#29](https://github.com/neutron0831/nlp100-ts/issues/29)) [[23a5be3](https://github.com/neutron0831/nlp100-ts/commit/23a5be3)]
10+
- ✨ Add Exercise 29 ([#29](https://github.com/neutron0831/nlp100-ts/issues/29)) [[3fd8fc1](https://github.com/neutron0831/nlp100-ts/commit/3fd8fc1)]
11+
- ✅ Add Exercise 28 test ([#28](https://github.com/neutron0831/nlp100-ts/issues/28)) [[3c48f58](https://github.com/neutron0831/nlp100-ts/commit/3c48f58)]
12+
- ✨ Add Exercise 28 ([#28](https://github.com/neutron0831/nlp100-ts/issues/28)) [[ab7468e](https://github.com/neutron0831/nlp100-ts/commit/ab7468e)]
13+
- ✅ Add Exercise 27 test ([#27](https://github.com/neutron0831/nlp100-ts/issues/27)) [[c4ad8bc](https://github.com/neutron0831/nlp100-ts/commit/c4ad8bc)]
14+
- ✨ Add Exercise 27 ([#27](https://github.com/neutron0831/nlp100-ts/issues/27)) [[f7812cc](https://github.com/neutron0831/nlp100-ts/commit/f7812cc)]
15+
- ✅ Add Exercise 26 test ([#26](https://github.com/neutron0831/nlp100-ts/issues/26)) [[56ae4d9](https://github.com/neutron0831/nlp100-ts/commit/56ae4d9)]
16+
- ✨ Add Exercise 26 ([#26](https://github.com/neutron0831/nlp100-ts/issues/26)) [[f0cd6de](https://github.com/neutron0831/nlp100-ts/commit/f0cd6de)]
17+
- ✅ Add Exercise 25 test ([#25](https://github.com/neutron0831/nlp100-ts/issues/25)) [[62a0ccc](https://github.com/neutron0831/nlp100-ts/commit/62a0ccc)]
18+
- ✨ Add Exercise 25 ([#25](https://github.com/neutron0831/nlp100-ts/issues/25)) [[cd9d727](https://github.com/neutron0831/nlp100-ts/commit/cd9d727)]
19+
- ✅ Add Exercise 24 test ([#24](https://github.com/neutron0831/nlp100-ts/issues/24)) [[c87677a](https://github.com/neutron0831/nlp100-ts/commit/c87677a)]
20+
- ✨ Add Exercise 24 ([#24](https://github.com/neutron0831/nlp100-ts/issues/24)) [[5cae007](https://github.com/neutron0831/nlp100-ts/commit/5cae007)]
21+
- ✅ Add Exercise 23 test ([#23](https://github.com/neutron0831/nlp100-ts/issues/23)) [[c49d1c1](https://github.com/neutron0831/nlp100-ts/commit/c49d1c1)]
22+
- ✨ Add Exercise 23 ([#23](https://github.com/neutron0831/nlp100-ts/issues/23)) [[3622d9c](https://github.com/neutron0831/nlp100-ts/commit/3622d9c)]
23+
- ✅ Add Exercise 22 test ([#22](https://github.com/neutron0831/nlp100-ts/issues/22)) [[64df6f2](https://github.com/neutron0831/nlp100-ts/commit/64df6f2)]
24+
- ✨ Add Exercise 22 ([#22](https://github.com/neutron0831/nlp100-ts/issues/22)) [[6a873b3](https://github.com/neutron0831/nlp100-ts/commit/6a873b3)]
25+
- ✅ Add Exercise 21 test ([#21](https://github.com/neutron0831/nlp100-ts/issues/21)) [[d967eed](https://github.com/neutron0831/nlp100-ts/commit/d967eed)]
26+
- ✨ Add Exercise 21 ([#21](https://github.com/neutron0831/nlp100-ts/issues/21)) [[908322f](https://github.com/neutron0831/nlp100-ts/commit/908322f)]
27+
- ✅ Add Exercise 20 test ([#20](https://github.com/neutron0831/nlp100-ts/issues/20)) [[714ba20](https://github.com/neutron0831/nlp100-ts/commit/714ba20)]
28+
- ✨ Add Exercise 20 ([#20](https://github.com/neutron0831/nlp100-ts/issues/20)) [[99f7ac9](https://github.com/neutron0831/nlp100-ts/commit/99f7ac9)]
29+
- ✨ Update `AttachmentPanel.vue` [[0bc47ad](https://github.com/neutron0831/nlp100-ts/commit/0bc47ad)]
30+
31+
### Changed
32+
33+
- 📱 Wrap words when the output is long [[85e3e48](https://github.com/neutron0831/nlp100-ts/commit/85e3e48)]
34+
- 🚸 Scroll to exercises after rendering [[7d1f15a](https://github.com/neutron0831/nlp100-ts/commit/7d1f15a)]
35+
- 🍱 Add `enwiki-country.json.gz` [[72bde9e](https://github.com/neutron0831/nlp100-ts/commit/72bde9e)]
36+
- 👽 Update `pnpm-lock.yaml` [[88373fd](https://github.com/neutron0831/nlp100-ts/commit/88373fd)]
37+
38+
### Fixed
39+
40+
- 🚑 Update a personal access token [[5abb560](https://github.com/neutron0831/nlp100-ts/commit/5abb560)]
41+
42+
### Miscellaneous
43+
44+
- 📝 Add `README.md` of Chapter 3 [[e688799](https://github.com/neutron0831/nlp100-ts/commit/e688799)]
45+
- 🩹 Add `?url` suffix [[3ff2c0a](https://github.com/neutron0831/nlp100-ts/commit/3ff2c0a)]
46+
47+
348
<a name="0.2.0"></a>
449
## [v0.2.0](https://github.com/neutron0831/nlp100-ts/compare/v0.1.1...v0.2.0) (2023-03-26)
550

package.json

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "nlp100-ts",
3-
"version": "0.2.0",
3+
"version": "0.3.0",
44
"description": "NLP 100 Exercise 2020 (Rev 2) in TypeScript",
55
"keywords": [
66
"nlp100",
@@ -9,13 +9,16 @@
99
"license": "MIT",
1010
"author": "NEUTRON <neutron0831@gmail.com> (https://github.com/neutron0831)",
1111
"homepage": "https://nlp100-ts.netlify.app",
12+
"config": {
13+
"MAX_OLD_SPACE_SIZE": "--max-old-space-size=6144"
14+
},
1215
"scripts": {
1316
"postinstall": "husky install && chmod +x .husky/*",
1417
"dev": "vite",
1518
"build": "vue-tsc --noEmit && vite build --target esnext",
1619
"preview": "vite preview",
17-
"test": "vitest --silent",
18-
"test:ch": "vitest src/chapters/$ch/test",
20+
"test": "NODE_OPTIONS=$npm_package_config_MAX_OLD_SPACE_SIZE vitest --silent",
21+
"test:ch": "NODE_OPTIONS=$npm_package_config_MAX_OLD_SPACE_SIZE vitest src/chapters/$ch/test",
1922
"lint": "eslint --ext .js,.ts,.vue --fix --no-error-on-unmatched-pattern src",
2023
"format": "prettier --write --ignore-unknown --no-error-on-unmatched-pattern src",
2124
"commitmsg": "commitlint --config .commitlintrc.js --edit $GIT_PARAMS",
@@ -27,6 +30,7 @@
2730
"core-js": "^3.8.3",
2831
"lodash": "^4.17.21",
2932
"papaparse": "^5.4.1",
33+
"path-browserify": "^1.0.1",
3034
"pinia": "^2.0.23",
3135
"rehype-highlight": "^6.0.0",
3236
"rehype-katex": "^6.0.2",
@@ -59,6 +63,7 @@
5963
"@vitejs/plugin-vue": "^3.0.3",
6064
"@vue/eslint-config-typescript": "^11.0.0",
6165
"commitlint": "^17.4.4",
66+
"cross-env": "^7.0.3",
6267
"eslint": "^8.22.0",
6368
"eslint-config-prettier": "^8.6.0",
6469
"eslint-plugin-vue": "^9.3.0",

pnpm-lock.yaml

Lines changed: 25 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/assets/enwiki-country.json.gz

12 MB
Binary file not shown.

src/chapters/03/README.md

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Chapter 3: Regular Expression
2+
3+
The file [enwiki-country.json.gz](../../assets/enwiki-country.json.gz) stores Wikipedia articles in the format:
4+
5+
- Each line stores a Wikipedia article in JSON format
6+
- Each JSON document has key-value pairs:
7+
- Title of the article as the value for the `title` key
8+
- Body of the article as the value for the `text` key
9+
- The entire file is compressed by gzip
10+
11+
Write codes that perform the following jobs.
12+
13+
## 20. [Read JSON documents](./ex20.ts)
14+
15+
Read the JSON documents and output the body of the article about the United Kingdom. Reuse the output in problems 21-29.
16+
17+
## 21. [Lines with category names](./ex21.ts)
18+
19+
Extract lines that define the categories of the article.
20+
21+
## 22. [Category names](./ex22.ts)
22+
23+
Extract the category names of the article.
24+
25+
## 23. [Section structure](./ex23.ts)
26+
27+
Extract section names in the article with their levels. For example, the level of the section is 1 for the MediaWiki markup `"== Section name =="`.
28+
29+
## 24. [Media references](./ex24.ts)
30+
31+
Extract references to media files linked from the article.
32+
33+
## 25. [Infobox](./ex25.ts)
34+
35+
Extract field names and their values in the Infobox "country", and store them in a dictionary object.
36+
37+
## 26. [Remove emphasis markups](./ex26.ts)
38+
39+
In addition to the process of the problem 25, remove emphasis MediaWiki markups from the values. See [Help:Cheatsheet](https://en.wikipedia.org/wiki/Help:Cheatsheet).
40+
41+
## 27. [Remove internal links](./ex27.ts)
42+
43+
In addition to the process of the problem 26, remove internal links from the values. See [Help:Cheatsheet](https://en.wikipedia.org/wiki/Help:Cheatsheet).
44+
45+
## 28. [Remove MediaWiki markups](./ex28.ts)
46+
47+
In addition to the process of the problem 27, remove MediaWiki markups from the values as much as you can, and obtain the basic information of the country in plain text format.
48+
49+
## 29. [Country flag](./ex29.ts)
50+
51+
Obtain the URL of the country flag by using the analysis result of Infobox. (Hint: convert a file reference to a URL by calling [imageinfo](https://www.mediawiki.org/wiki/API:Imageinfo) in [MediaWiki API](https://www.mediawiki.org/wiki/API:Main_page))

src/chapters/03/ex20.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Using custom plugin [gzip-import.ts](https://gist.github.com/neutron0831/73cfd03ef5bc14cfb132cc32a7445b35)
2+
import enwikiCountryJson from '@/assets/enwiki-country.json.gz?raw'
3+
4+
/**
5+
*
6+
* @interface Article
7+
* @property {string} title - Title of the article
8+
* @property {string} text - Body of the article
9+
*/
10+
export interface Article {
11+
title: string
12+
text: string
13+
}
14+
15+
/**
16+
* ### 20. Read JSON documents
17+
* Read the JSON documents and output the body of the article about the United Kingdom. Reuse the output in problems 21-29.
18+
*
19+
* @returns {Promise<Article>}
20+
*/
21+
async function ex20(): Promise<Article> {
22+
const enwikiCountries: Article[] = enwikiCountryJson
23+
.trim()
24+
.split('\n')
25+
.map((line: string) => JSON.parse(line))
26+
const unitedKingdom = enwikiCountries.find(
27+
(country) => country.title === 'United Kingdom',
28+
)!
29+
30+
return unitedKingdom
31+
}
32+
33+
export { ex20 }

src/chapters/03/ex21.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { ex20 } from './ex20'
2+
import { map } from 'lodash'
3+
4+
/**
5+
* ### 21. Lines with category names
6+
* Extract lines that define the categories of the article.
7+
*
8+
* @returns {Promise<string[]>}
9+
*/
10+
async function ex21(): Promise<string[]> {
11+
const { text } = await ex20()
12+
const categories = map([...text.matchAll(/(\[\[Category:.*\]\])/g)], 1)
13+
14+
return categories
15+
}
16+
17+
export { ex21 }

src/chapters/03/ex22.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { ex21 } from './ex21'
2+
import { map } from 'lodash'
3+
4+
/**
5+
* ### 22. Category names
6+
* Extract the category names of the article.
7+
*
8+
* @returns {Promise<string[]>}
9+
*/
10+
async function ex22(): Promise<string[]> {
11+
const categories = await ex21()
12+
const categoryNames = map(
13+
[...categories.join('\n').matchAll(/\[\[Category:(.*)\]\]/g)],
14+
1,
15+
)
16+
17+
return categoryNames
18+
}
19+
20+
export { ex22 }

0 commit comments

Comments
 (0)