Skip to content

Commit 0ccf637

Browse files
committed
test wikipedia: use real data
1 parent 235d023 commit 0ccf637

File tree

1 file changed

+25
-70
lines changed

1 file changed

+25
-70
lines changed

test/test-wikipedia.rb

Lines changed: 25 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,99 +1,54 @@
11
class WikipediaTest < Test::Unit::TestCase
2-
sub_test_case("ja") do
2+
sub_test_case("en") do
33
sub_test_case("articles") do
4-
include Helper::Sandbox
5-
64
def setup
7-
setup_sandbox
8-
@dataset = Datasets::Wikipedia.new(language: :ja,
5+
@dataset = Datasets::Wikipedia.new(language: :en,
96
type: :articles)
10-
def @dataset.cache_dir_path
11-
@cache_dir_path
12-
end
13-
def @dataset.cache_dir_path=(path)
14-
@cache_dir_path = path
15-
end
16-
@dataset.cache_dir_path = @tmp_dir
17-
end
18-
19-
def teardown
20-
teardown_sandbox
217
end
228

239
test("#each") do
24-
data_path = @dataset.__send__(:data_path)
25-
xml_path = data_path.sub_ext("")
26-
xml_path.open("w") do |xml_file|
27-
xml_file.puts(<<-XML)
28-
<mediawiki
29-
xmlns="http://www.mediawiki.org/xml/export-0.10/"
30-
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
31-
xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.10/ http://www.mediawiki.org/xml/export-0.10.xsd"
32-
version="0.10" xml:lang="ja">
33-
<siteinfo>
34-
<sitename>Wikipedia</sitename>
35-
</siteinfo>
36-
<page>
37-
<title>タイトル</title>
38-
<ns>4</ns>
39-
<id>1</id>
40-
<restrictions>sysop</restrictions>
41-
<revision>
42-
<id>3</id>
43-
<parentid>2</parentid>
44-
<timestamp>2004-04-30T14:46:00Z</timestamp>
45-
<contributor>
46-
<username>user</username>
47-
<id>10</id>
48-
</contributor>
49-
<minor />
50-
<comment>コメント</comment>
51-
<model>wikitext</model>
52-
<format>text/x-wiki</format>
53-
<text xml:space="preserve">テキスト</text>
54-
<sha1>a9674b19f8c56f785c91a555d0a144522bb318e6</sha1>
55-
</revision>
56-
</page>
57-
</mediawiki>
58-
XML
59-
end
60-
unless system("bzip2", xml_path.to_s)
61-
raise "failed to run bzip2"
62-
end
63-
64-
contributor = Datasets::Wikipedia::Contributor.new("user", 10)
10+
contributor = Datasets::Wikipedia::Contributor.new("Elli", 20842734)
6511
revision = Datasets::Wikipedia::Revision.new
66-
revision.id = 3
67-
revision.parent_id = 2
68-
revision.timestamp = Time.iso8601("2004-04-30T14:46:00Z")
12+
revision.id = 1002250816
13+
revision.parent_id = 854851586
14+
revision.timestamp = Time.iso8601("2021-01-23T15:15:01Z")
6915
revision.contributor = contributor
70-
revision.comment = "コメント"
16+
revision.comment = "shel"
7117
revision.model = "wikitext"
7218
revision.format = "text/x-wiki"
73-
revision.text = "テキスト"
74-
revision.sha1 = "a9674b19f8c56f785c91a555d0a144522bb318e6"
19+
revision.text = <<-TEXT.chomp
20+
#REDIRECT [[Computer accessibility]]
21+
22+
{{rcat shell|
23+
{{R from move}}
24+
{{R from CamelCase}}
25+
{{R unprintworthy}}
26+
}}
27+
TEXT
28+
revision.sha1 = "kmysdltgexdwkv2xsml3j44jb56dxvn"
7529
page = Datasets::Wikipedia::Page.new
76-
page.title = "タイトル"
77-
page.namespace = 4
78-
page.id = 1
79-
page.restrictions = ["sysop"]
30+
page.title = "AccessibleComputing"
31+
page.namespace = 0
32+
page.id = 10
33+
page.restrictions = nil
34+
page.redirect = "Computer accessibility"
8035
page.revision = revision
8136
assert_equal(page, @dataset.each.first)
8237
end
8338

8439
sub_test_case("#metadata") do
8540
test("#id") do
86-
assert_equal("wikipedia-ja-articles",
41+
assert_equal("wikipedia-en-articles",
8742
@dataset.metadata.id)
8843
end
8944

9045
test("#name") do
91-
assert_equal("Wikipedia articles (ja)",
46+
assert_equal("Wikipedia articles (en)",
9247
@dataset.metadata.name)
9348
end
9449

9550
test("#description") do
96-
assert_equal("Wikipedia articles in ja",
51+
assert_equal("Wikipedia articles in en",
9752
@dataset.metadata.description)
9853
end
9954
end

0 commit comments

Comments
 (0)