Skip to content

Commit a2e969e

Browse files
fixed tests
1 parent 3111dde commit a2e969e

File tree

8 files changed

+84
-170
lines changed

8 files changed

+84
-170
lines changed

requirements.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,16 @@ dirty-equals
2626

2727
pytest-cov
2828

29-
pytest
29+
pytest
30+
31+
anyio
32+
33+
pytest-asyncio
34+
35+
pytest-tornasync
36+
37+
pytest-trio
38+
39+
pytest-twisted
40+
41+
twisted

src/main.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212

1313

1414
def write_excel(csv_path, xlsx_path):
15-
df = pd.read_csv(csv_path)
15+
import pandas as pd
16+
from openpyxl.styles import Font
17+
18+
df = pd.read_csv(csv_path, dtype=str, keep_default_na=False)
19+
1620
for col in ("emails", "phones"):
1721
if col in df.columns:
1822
df[col] = (
1923
df[col]
20-
.fillna("")
2124
.str.replace(";", " ")
2225
.str.replace(r"\s+", " ", regex=True)
2326
.str.strip()
@@ -26,21 +29,13 @@ def write_excel(csv_path, xlsx_path):
2629
with pd.ExcelWriter(xlsx_path, engine="openpyxl") as w:
2730
df.to_excel(w, index=False, sheet_name="Wyniki")
2831
ws = w.sheets["Wyniki"]
29-
30-
# nagłówki, freeze, filtr
3132
for c in ws[1]:
3233
c.font = Font(bold=True)
3334
ws.freeze_panes = "A2"
3435
ws.auto_filter.ref = ws.dimensions
35-
36-
# auto-szerokość (z limitem)
3736
for col in ws.columns:
3837
length = max(len(str(c.value)) if c.value else 0 for c in col)
39-
ws.column_dimensions[col[0].column_letter].width = min(
40-
max(12, int(length * 0.9)), 60
41-
)
42-
43-
# hiperlinki
38+
ws.column_dimensions[col[0].column_letter].width = min(max(12, int(length * 0.9)), 60)
4439
cols = [c for c in ("url", "contact_url") if c in df.columns]
4540
for row in range(2, ws.max_row + 1):
4641
for name in cols:
@@ -52,6 +47,7 @@ def write_excel(csv_path, xlsx_path):
5247
cell.style = "Hyperlink"
5348

5449

50+
5551
def in_robots(url: str) -> bool:
5652
base = f"{urllib.parse.urlsplit(url).scheme}://{urllib.parse.urlsplit(url).netloc}"
5753
robots = rp.RobotFileParser()

tests/conftest.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
import os, sys
2-
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))
3-
if ROOT not in sys.path:
4-
sys.path.insert(0, ROOT)
1+
import sys
2+
from pathlib import Path
3+
4+
REPO_ROOT = Path(__file__).resolve().parents[1]
5+
if str(REPO_ROOT) not in sys.path:
6+
sys.path.insert(0, str(REPO_ROOT))

tests/test_crawl_run.py

Lines changed: 20 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,27 @@
1-
<<<<<<< HEAD
2-
import httpx, pytest, respx
1+
import httpx, respx, pytest
32
from src.main import crawl_one, run
4-
=======
5-
import httpx
6-
import pytest
7-
import respx
83

9-
from main import crawl_one, run
10-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
11-
12-
13-
@pytest.mark.asyncio
144
@respx.mock
5+
@pytest.mark.anyio
156
async def test_crawl_one_merges_contact(monkeypatch):
16-
monkeypatch.setattr("ginio.in_robots", lambda url: True)
17-
respx.get("https://site.test/").mock(
18-
return_value=httpx.Response(
19-
200,
20-
text="""
21-
<a href="/contact">Contact</a>
22-
23-
""",
24-
)
25-
)
26-
respx.get("https://site.test/contact").mock(
27-
return_value=httpx.Response(
28-
200,
29-
text="""
30-
<p>[email protected] 123 456 789</p>
31-
""",
32-
)
33-
)
34-
async with httpx.AsyncClient() as client:
35-
out = await crawl_one("https://site.test/", client)
36-
assert "[email protected]" in out["emails"] and "[email protected]" in out["emails"]
37-
assert any("123" in p for p in out["phones"])
38-
7+
monkeypatch.setattr("src.main.in_robots", lambda u: True)
8+
respx.get("https://site.test/").mock(return_value=httpx.Response(200, text="""
9+
<a href="/contact">Contact</a><p>[email protected]</p>"""))
10+
respx.get("https://site.test/contact").mock(return_value=httpx.Response(200, text="""
11+
<p>[email protected] 123 456 789</p>"""))
12+
async with httpx.AsyncClient() as c:
13+
out = await crawl_one("https://site.test/", c)
14+
assert {"[email protected]","[email protected]"} <= set(out["emails"])
3915

40-
@pytest.mark.asyncio
4116
@respx.mock
17+
@pytest.mark.anyio
4218
async def test_run_full(monkeypatch):
43-
respx.get("https://serpapi.com/search").mock(
44-
return_value=httpx.Response(
45-
200,
46-
json={
47-
"organic_results": [{"link": "https://a.pl"}, {"link": "https://b.pl"}]
48-
},
49-
)
50-
)
51-
# stub robots + strony
52-
monkeypatch.setattr("ginio.in_robots", lambda url: True)
53-
respx.get("https://a.pl").mock(
54-
return_value=httpx.Response(200, text="<title>A</title>")
55-
)
56-
respx.get("https://b.pl").mock(
57-
return_value=httpx.Response(200, text="<title>B</title>")
58-
)
59-
monkeypatch.setenv("SERPAPI_KEY", "x")
60-
results = await run("foo")
61-
assert {r["title"] for r in results} == {"A", "B"}
19+
respx.get("https://serpapi.com/search").mock(return_value=httpx.Response(
20+
200, json={"organic_results":[{"link":"https://a.pl"},{"link":"https://b.pl"}]}
21+
))
22+
monkeypatch.setattr("src.main.in_robots", lambda u: True)
23+
respx.get("https://a.pl").mock(return_value=httpx.Response(200, text="<title>A</title>"))
24+
respx.get("https://b.pl").mock(return_value=httpx.Response(200, text="<title>B</title>"))
25+
monkeypatch.setenv("SERPAPI_KEY","x")
26+
out = await run("foo")
27+
assert {r["title"] for r in out} == {"A","B"}

tests/test_excel.py

Lines changed: 10 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,21 @@
11
from pathlib import Path
2-
32
import pandas as pd
4-
<<<<<<< HEAD
5-
from src.main import save_results
6-
=======
7-
8-
from main import save_results
9-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
10-
3+
from src.app_gui import save_results
4+
from src.main import write_excel as real_write_excel
115

126
def test_save_results_creates_files(tmp_path, monkeypatch):
13-
<<<<<<< HEAD
14-
root_out = tmp_path/"wyniki"
15-
data = [{"url":"https://x","title":"X","emails":["a@x"],"phones":["123"],"contact_url":None}]
16-
from src.main import write_excel as real_write_excel
17-
=======
187
root_out = tmp_path / "wyniki"
19-
data = [
20-
{
21-
"url": "https://x",
22-
"title": "X",
23-
"emails": ["a@x"],
24-
"phones": ["123"],
25-
"contact_url": None,
26-
}
27-
]
28-
from main import write_excel as real_write_excel
8+
data = [{"url":"https://x","title":"X","emails":["a@x"],"phones":["123"],"contact_url":None}]
299

30-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
31-
xlsx_called = {}
10+
called = {}
11+
def fake_write(csv_path, xlsx_path):
12+
called["csv"] = csv_path; called["xlsx"] = xlsx_path
13+
return real_write_excel(csv_path, xlsx_path)
3214

33-
def fake_write(csv, xlsx):
34-
xlsx_called["csv"] = csv
35-
xlsx_called["xlsx"] = xlsx
36-
return real_write_excel(csv, xlsx)
15+
monkeypatch.setattr("src.app_gui.write_excel", fake_write)
3716

38-
monkeypatch.setattr("ginio.write_excel", fake_write)
3917
csv_p, xlsx_p = save_results(data, "20250101_000000", root_out)
4018
assert Path(csv_p).exists() and Path(xlsx_p).exists()
4119
df = pd.read_excel(xlsx_p)
42-
assert set(df.columns) >= {"url", "title", "emails", "phones", "contact_url"}
20+
assert {"url","title","emails","phones","contact_url"}.issubset(df.columns)
21+
assert called["csv"] == str(csv_p) and called["xlsx"] == str(xlsx_p)

tests/test_gui.py

Lines changed: 18 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,36 @@
11
from unittest.mock import patch
22

3-
43
def test_ensure_api_key_env(monkeypatch):
5-
<<<<<<< HEAD
64
monkeypatch.setenv("SERPAPI_KEY","secret")
7-
from src.main import ensure_api_key
8-
assert ensure_api_key()=="secret"
9-
10-
def test_ensure_api_key_prompt(tmp_path, monkeypatch):
11-
monkeypatch.setenv("APPDATA", str(tmp_path))
12-
from src.main import ensure_api_key
13-
=======
14-
monkeypatch.setenv("SERPAPI_KEY", "secret")
15-
from main import ensure_api_key
16-
5+
from src.app_gui import ensure_api_key
176
assert ensure_api_key() == "secret"
187

19-
208
def test_ensure_api_key_prompt(tmp_path, monkeypatch):
9+
monkeypatch.delenv("SERPAPI_KEY", raising=False)
2110
monkeypatch.setenv("APPDATA", str(tmp_path))
22-
from main import ensure_api_key
23-
24-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
25-
with patch("ginio.simpledialog.askstring", return_value="abc"):
26-
key = ensure_api_key()
27-
assert key == "abc"
28-
11+
from src.app_gui import ensure_api_key
12+
with patch("src.app_gui.simpledialog.askstring", return_value="abc"):
13+
assert ensure_api_key() == "abc"
2914

3015
def test_start_calls_run_without_threading(monkeypatch):
31-
<<<<<<< HEAD
3216
import src.app_gui as g
33-
=======
34-
import app_gui as g
35-
36-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
3717
g.build_ui()
38-
g.entry_query.delete(0, "end")
39-
g.entry_query.insert(0, "kawa")
40-
monkeypatch.setenv("SERPAPI_KEY", "x")
18+
g.entry_query.delete(0,'end'); g.entry_query.insert(0,"kawa")
19+
monkeypatch.setenv("SERPAPI_KEY","x")
4120

4221
class DummyThread:
43-
def __init__(self, target, daemon):
44-
self.target = target
45-
46-
def start(self):
47-
self.target()
48-
49-
monkeypatch.setattr("ginio_gui.threading.Thread", DummyThread)
50-
monkeypatch.setattr(
51-
"ginio_gui.run",
52-
lambda q: [
53-
{"url": "u", "title": "t", "emails": [], "phones": [], "contact_url": None}
54-
],
55-
)
22+
def __init__(self, target, daemon): self.target = target
23+
def start(self): self.target()
24+
25+
monkeypatch.setattr("src.app_gui.threading.Thread", DummyThread)
26+
monkeypatch.setattr(g.root, "after", lambda ms, fn: fn(), raising=False)
27+
28+
async def fake_run(q):
29+
return [{"url":"u","title":"t","emails":[],"phones":[],"contact_url":None}]
30+
monkeypatch.setattr("src.app_gui.run", fake_run)
31+
5632
with patch.object(g, "messagebox"):
5733
g.start()
5834
assert g.btn_start["state"] == "normal"
5935
assert "OK — zapisano" in g.status.get()
36+

tests/test_net.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,18 @@
1-
<<<<<<< HEAD
2-
import httpx, pytest, respx
1+
import httpx, respx, pytest
32
from src.main import fetch
4-
=======
5-
import httpx
6-
import pytest
7-
import respx
83

9-
from main import fetch
10-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
11-
12-
13-
@pytest.mark.asyncio
144
@respx.mock
5+
@pytest.mark.anyio
156
async def test_fetch_ok(monkeypatch):
16-
monkeypatch.setattr("ginio.in_robots", lambda url: True)
17-
respx.get("https://x.test/ok").mock(
18-
return_value=httpx.Response(200, text="<h1>ok</h1>")
19-
)
20-
async with httpx.AsyncClient() as client:
21-
html = await fetch("https://x.test/ok", client)
7+
monkeypatch.setattr("src.main.in_robots", lambda u: True)
8+
respx.get("https://x.test/ok").mock(return_value=httpx.Response(200, text="<h1>ok</h1>"))
9+
async with httpx.AsyncClient() as c:
10+
html = await fetch("https://x.test/ok", c)
2211
assert "ok" in html
2312

24-
25-
@pytest.mark.asyncio
2613
@respx.mock
14+
@pytest.mark.anyio
2715
async def test_fetch_respects_robots(monkeypatch):
28-
monkeypatch.setattr("ginio.in_robots", lambda url: False)
29-
async with httpx.AsyncClient() as client:
30-
assert await fetch("https://x.test/nope", client) is None
16+
monkeypatch.setattr("src.main.in_robots", lambda u: False)
17+
async with httpx.AsyncClient() as c:
18+
assert await fetch("https://x.test/nope", c) is None

tests/test_parse.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
1-
<<<<<<< HEAD
21
from src.main import parse_info, absolutize
3-
=======
4-
from main import absolutize, parse_info
5-
6-
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
72

83
def test_absolutize():
94
assert absolutize("https://ex.com/dir/", "../a") == "https://ex.com/a"
105

11-
126
def test_parse_info_extracts_contact():
137
html = """
148
<html><head><title> ACME </title></head>

0 commit comments

Comments
 (0)