Skip to content

Commit e7dcc5f

Browse files
committed
update content/practice/advanced.md
1 parent b0bc856 commit e7dcc5f

File tree

1 file changed

+10
-16
lines changed

1 file changed

+10
-16
lines changed

content/practice/advanced.md

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -38,28 +38,22 @@ Some tips:
3838
{% end %}
3939
{% solution() %}
4040
```phel
41-
# Load the full book content from the web into the `full-book` constant
4241
(def book-url "https://gist.githubusercontent.com/Chemaclass/da9a0ba72adee6644193c730d4f307b2/raw/1164593f76ae7157d816bcc8d700937dfb73420e/moby-dick.txt")
4342
(def full-book (php/file_get_contents book-url)) # total length 643063 chars
43+
(def words (re-seq "/\\w+/" full-book))
4444
45-
# Take only a part of the full-book in order to speed the execution example.
46-
(def book (php/substr full-book 0 30000))
45+
# Create a set with common stop-words (all lowercase for consistency)
46+
(def stop-words #{"the" "he" "at" "but" "there" "of" "was" "be" "not" "use" "and" "for" "this" "what" "an" "a" "on" "have" "all" "each" "to" "are" "from" "were" "which" "in" "as" "or" "we" "she" "is" "with" "ine" "when" "do" "you" "his" "had" "your" "how" "that" "they" "by" "can" "their" "it" "I" "word" "said" "if" "i" "s"})
4747
48-
# Create a vector using all words from the book
49-
(def words (re-seq "/\b\w+\b/" book))
50-
51-
# Create a set with the common words that you want to filter out
52-
(def stop-words (set "the" "he" "at" "but" "there" "of" "was" "be" "not" "use" "and" "for" "this" "what" "an" "a" "on" "have" "all" "each" "to" "are" "from" "were" "which" "in" "as" "or" "we" "she" "is" "with" "ine" "when" "do" "you" "his" "had" "your" "how" "that" "they" "by" "can" "their" "it" "I" "word" "said" "if" "i" "s"))
53-
54-
# To each word
48+
# Process words and find top 5 most frequent (excluding stop-words)
5549
(->> words
56-
# map them as lower case
50+
# map to lowercase for case-insensitive comparison
5751
(map php/strtolower)
58-
# filter out the common words
59-
(filter |(nil? (stop-words $)))
60-
# calculate the frequencies of their appearance
52+
# filter out common stop-words
53+
(filter |(not (contains? stop-words $)))
54+
# calculate frequency of each word
6155
(frequencies)
62-
# and create pairs of `word -> number of occurrences`
56+
# convert to [word count] pairs
6357
(pairs)
6458
# sort by the number of occurrences
6559
(sort-by second)
@@ -71,7 +65,7 @@ Some tips:
7165
(apply println))
7266
7367
# Output:
74-
# [whale 81] [whales 26] [sea 21] [some 19] [up 17]
68+
# [whale 566] [like 323] [then 302] [upon 298] [ye 288]
7569
```
7670
{% end %}
7771

0 commit comments

Comments
 (0)