Skip to content

Commit 3111dde

Browse files
2 parents efde2ad + e772c41 commit 3111dde

File tree

6 files changed

+127
-22
lines changed

6 files changed

+127
-22
lines changed

scripts/to_xlsx.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
import sys, pandas as pd
2-
from openpyxl.styles import Font, Alignment
1+
import sys
2+
3+
import pandas as pd
4+
from openpyxl.styles import Alignment, Font
35
from openpyxl.utils import get_column_letter
46

57

tests/test_crawl_run.py

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,61 @@
1+
<<<<<<< HEAD
12
import httpx, pytest, respx
23
from src.main import crawl_one, run
4+
=======
5+
import httpx
6+
import pytest
7+
import respx
8+
9+
from main import crawl_one, run
10+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
11+
312

413
@pytest.mark.asyncio
514
@respx.mock
615
async def test_crawl_one_merges_contact(monkeypatch):
716
monkeypatch.setattr("ginio.in_robots", lambda url: True)
8-
respx.get("https://site.test/").mock(return_value=httpx.Response(200, text="""
17+
respx.get("https://site.test/").mock(
18+
return_value=httpx.Response(
19+
200,
20+
text="""
921
<a href="/contact">Contact</a>
1022
11-
"""))
12-
respx.get("https://site.test/contact").mock(return_value=httpx.Response(200, text="""
23+
""",
24+
)
25+
)
26+
respx.get("https://site.test/contact").mock(
27+
return_value=httpx.Response(
28+
200,
29+
text="""
1330
<p>[email protected] 123 456 789</p>
14-
"""))
31+
""",
32+
)
33+
)
1534
async with httpx.AsyncClient() as client:
1635
out = await crawl_one("https://site.test/", client)
1736
assert "[email protected]" in out["emails"] and "[email protected]" in out["emails"]
1837
assert any("123" in p for p in out["phones"])
1938

39+
2040
@pytest.mark.asyncio
2141
@respx.mock
2242
async def test_run_full(monkeypatch):
23-
respx.get("https://serpapi.com/search").mock(return_value=httpx.Response(
24-
200, json={"organic_results":[{"link":"https://a.pl"}, {"link":"https://b.pl"}]}
25-
))
43+
respx.get("https://serpapi.com/search").mock(
44+
return_value=httpx.Response(
45+
200,
46+
json={
47+
"organic_results": [{"link": "https://a.pl"}, {"link": "https://b.pl"}]
48+
},
49+
)
50+
)
2651
# stub robots + strony
2752
monkeypatch.setattr("ginio.in_robots", lambda url: True)
28-
respx.get("https://a.pl").mock(return_value=httpx.Response(200, text="<title>A</title>"))
29-
respx.get("https://b.pl").mock(return_value=httpx.Response(200, text="<title>B</title>"))
30-
monkeypatch.setenv("SERPAPI_KEY","x")
53+
respx.get("https://a.pl").mock(
54+
return_value=httpx.Response(200, text="<title>A</title>")
55+
)
56+
respx.get("https://b.pl").mock(
57+
return_value=httpx.Response(200, text="<title>B</title>")
58+
)
59+
monkeypatch.setenv("SERPAPI_KEY", "x")
3160
results = await run("foo")
32-
assert {r["title"] for r in results} == {"A","B"}
61+
assert {r["title"] for r in results} == {"A", "B"}

tests/test_excel.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,42 @@
11
from pathlib import Path
2+
23
import pandas as pd
4+
<<<<<<< HEAD
35
from src.main import save_results
6+
=======
7+
8+
from main import save_results
9+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
10+
411

512
def test_save_results_creates_files(tmp_path, monkeypatch):
13+
<<<<<<< HEAD
614
root_out = tmp_path/"wyniki"
715
data = [{"url":"https://x","title":"X","emails":["a@x"],"phones":["123"],"contact_url":None}]
816
from src.main import write_excel as real_write_excel
17+
=======
18+
root_out = tmp_path / "wyniki"
19+
data = [
20+
{
21+
"url": "https://x",
22+
"title": "X",
23+
"emails": ["a@x"],
24+
"phones": ["123"],
25+
"contact_url": None,
26+
}
27+
]
28+
from main import write_excel as real_write_excel
29+
30+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
931
xlsx_called = {}
32+
1033
def fake_write(csv, xlsx):
11-
xlsx_called["csv"] = csv; xlsx_called["xlsx"] = xlsx
34+
xlsx_called["csv"] = csv
35+
xlsx_called["xlsx"] = xlsx
1236
return real_write_excel(csv, xlsx)
37+
1338
monkeypatch.setattr("ginio.write_excel", fake_write)
1439
csv_p, xlsx_p = save_results(data, "20250101_000000", root_out)
1540
assert Path(csv_p).exists() and Path(xlsx_p).exists()
1641
df = pd.read_excel(xlsx_p)
17-
assert set(df.columns) >= {"url","title","emails","phones","contact_url"}
42+
assert set(df.columns) >= {"url", "title", "emails", "phones", "contact_url"}

tests/test_gui.py

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,58 @@
11
from unittest.mock import patch
22

3+
34
def test_ensure_api_key_env(monkeypatch):
5+
<<<<<<< HEAD
46
monkeypatch.setenv("SERPAPI_KEY","secret")
57
from src.main import ensure_api_key
68
assert ensure_api_key()=="secret"
79

810
def test_ensure_api_key_prompt(tmp_path, monkeypatch):
911
monkeypatch.setenv("APPDATA", str(tmp_path))
1012
from src.main import ensure_api_key
13+
=======
14+
monkeypatch.setenv("SERPAPI_KEY", "secret")
15+
from main import ensure_api_key
16+
17+
assert ensure_api_key() == "secret"
18+
19+
20+
def test_ensure_api_key_prompt(tmp_path, monkeypatch):
21+
monkeypatch.setenv("APPDATA", str(tmp_path))
22+
from main import ensure_api_key
23+
24+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
1125
with patch("ginio.simpledialog.askstring", return_value="abc"):
1226
key = ensure_api_key()
13-
assert key=="abc"
27+
assert key == "abc"
28+
1429

1530
def test_start_calls_run_without_threading(monkeypatch):
31+
<<<<<<< HEAD
1632
import src.app_gui as g
33+
=======
34+
import app_gui as g
35+
36+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
1737
g.build_ui()
18-
g.entry_query.delete(0,'end'); g.entry_query.insert(0,"kawa")
19-
monkeypatch.setenv("SERPAPI_KEY","x")
38+
g.entry_query.delete(0, "end")
39+
g.entry_query.insert(0, "kawa")
40+
monkeypatch.setenv("SERPAPI_KEY", "x")
41+
2042
class DummyThread:
21-
def __init__(self, target, daemon): self.target=target
22-
def start(self): self.target()
43+
def __init__(self, target, daemon):
44+
self.target = target
45+
46+
def start(self):
47+
self.target()
48+
2349
monkeypatch.setattr("ginio_gui.threading.Thread", DummyThread)
24-
monkeypatch.setattr("ginio_gui.run", lambda q: [{"url":"u","title":"t","emails":[],"phones":[],"contact_url":None}])
50+
monkeypatch.setattr(
51+
"ginio_gui.run",
52+
lambda q: [
53+
{"url": "u", "title": "t", "emails": [], "phones": [], "contact_url": None}
54+
],
55+
)
2556
with patch.object(g, "messagebox"):
2657
g.start()
2758
assert g.btn_start["state"] == "normal"

tests/test_net.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,27 @@
1+
<<<<<<< HEAD
12
import httpx, pytest, respx
23
from src.main import fetch
4+
=======
5+
import httpx
6+
import pytest
7+
import respx
8+
9+
from main import fetch
10+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
11+
312

413
@pytest.mark.asyncio
514
@respx.mock
615
async def test_fetch_ok(monkeypatch):
716
monkeypatch.setattr("ginio.in_robots", lambda url: True)
8-
respx.get("https://x.test/ok").mock(return_value=httpx.Response(200, text="<h1>ok</h1>"))
17+
respx.get("https://x.test/ok").mock(
18+
return_value=httpx.Response(200, text="<h1>ok</h1>")
19+
)
920
async with httpx.AsyncClient() as client:
1021
html = await fetch("https://x.test/ok", client)
1122
assert "ok" in html
1223

24+
1325
@pytest.mark.asyncio
1426
@respx.mock
1527
async def test_fetch_respects_robots(monkeypatch):

tests/test_parse.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1+
<<<<<<< HEAD
12
from src.main import parse_info, absolutize
3+
=======
4+
from main import absolutize, parse_info
5+
6+
>>>>>>> e772c4167010570baed9b34907b8a8834fe77e81
27

38
def test_absolutize():
49
assert absolutize("https://ex.com/dir/", "../a") == "https://ex.com/a"
510

11+
612
def test_parse_info_extracts_contact():
713
html = """
814
<html><head><title> ACME </title></head>

0 commit comments

Comments
 (0)