|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | import pytest |
3 | 3 | import glob |
| 4 | +import os |
4 | 5 |
|
5 | 6 | from html_text import (extract_text, parse_html, cleaned_selector, |
6 | 7 | selector_to_text, NEWLINE_TAGS, DOUBLE_NEWLINE_TAGS) |
7 | 8 |
|
8 | 9 |
|
| 10 | +ROOT = os.path.dirname(os.path.abspath(__file__)) |
| 11 | + |
| 12 | + |
9 | 13 | @pytest.fixture(params=[ |
10 | 14 | {'guess_punct_space': True, 'guess_layout': False}, |
11 | 15 | {'guess_punct_space': False, 'guess_layout': False}, |
@@ -149,12 +153,16 @@ def test_personalize_newlines_sets(): |
149 | 153 | assert text == 'text\n\nmore\n\nand more text\n\nand some more' |
150 | 154 |
|
151 | 155 |
|
152 | | -def test_webpages(): |
153 | | - webpages = sorted(glob.glob('./test_webpages/*.html')) |
154 | | - extracted = sorted(glob.glob('./test_webpages/*.txt')) |
155 | | - for page, extr in zip(webpages, extracted): |
156 | | - with open(page, 'r', encoding='utf8') as f_in: |
157 | | - html = f_in.read() |
158 | | - with open(extr, 'r', encoding='utf8') as f_in: |
159 | | - expected = f_in.read() |
160 | | - assert extract_text(html) == expected |
| 156 | +def _load_examples(): |
| 157 | + webpages = sorted(glob.glob(os.path.join(ROOT, 'test_webpages', '*.html'))) |
| 158 | + extracted = sorted(glob.glob(os.path.join(ROOT, 'test_webpages','*.txt'))) |
| 159 | + return list(zip(webpages, extracted)) |
| 160 | + |
| 161 | + |
| 162 | +@pytest.mark.parametrize(['page', 'extracted'], _load_examples()) |
| 163 | +def test_foo(page, extracted): |
| 164 | + with open(page, 'r', encoding='utf8') as f_in: |
| 165 | + html = f_in.read() |
| 166 | + with open(extracted, 'r', encoding='utf8') as f_in: |
| 167 | + expected = f_in.read() |
| 168 | + assert extract_text(html) == expected |
0 commit comments