Skip to content

Commit 8ab24b6

Browse files
committed
distinguish server vs internet issues
1 parent 8831881 commit 8ab24b6

File tree

3 files changed

+130
-10
lines changed

3 files changed

+130
-10
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import pytest
2+
from orangecontrib.text.vectorization.document_embedder import DocumentEmbedder
3+
from orangecontrib.text import Corpus
4+
from Orange.misc.utils.embedder_utils import EmbeddingConnectionError
5+
from orangecontrib.text.vectorization.document_embedder import _ServerEmbedder
6+
from urllib.parse import urlparse
7+
import socket
8+
9+
@pytest.fixture
10+
def dummy_corpus():
11+
return Corpus.from_documents(["This is a test document."], name="test")
12+
13+
def test_embedding_valid_server(dummy_corpus):
14+
embedder = DocumentEmbedder(language="en", aggregator="Mean")
15+
new_corpus, skipped = embedder._transform(dummy_corpus, None)
16+
assert new_corpus is not None
17+
assert skipped is None or len(skipped) == 0
18+
19+
def test_invalid_server_raises(dummy_corpus):
20+
class BrokenEmbedder(DocumentEmbedder):
21+
def _transform(self, corpus, _, callback=None):
22+
23+
embedder = _ServerEmbedder(
24+
aggregator="mean",
25+
model_name="fasttext-en",
26+
max_parallel_requests=100,
27+
server_url="https://api.invalidserver.io",
28+
embedder_type="text",
29+
)
30+
31+
url = urlparse(embedder.server_url)
32+
host, port = url.hostname, url.port or (443 if url.scheme == "https" else 80)
33+
try:
34+
socket.create_connection((host, port), timeout=3)
35+
except Exception as e:
36+
raise EmbeddingConnectionError("The server is not responding") from e
37+
38+
return [], None
39+
40+
embedder = BrokenEmbedder(language="en", aggregator="Mean")
41+
with pytest.raises(EmbeddingConnectionError, match="server is not responding"):
42+
embedder._transform(dummy_corpus, None)
43+
44+
def test_no_internet_raises(dummy_corpus, monkeypatch):
45+
class NoInternetEmbedder(DocumentEmbedder):
46+
def _transform(self, corpus, _, callback=None):
47+
48+
embedder = _ServerEmbedder(
49+
aggregator="mean",
50+
model_name="fasttext-en",
51+
max_parallel_requests=100,
52+
server_url="https://api.garaza.io",
53+
embedder_type="text",
54+
)
55+
56+
def raise_os_error(*args, **kwargs):
57+
raise OSError("Simulated: No internet connection")
58+
59+
monkeypatch.setattr("socket.create_connection", raise_os_error)
60+
61+
url = urlparse(embedder.server_url)
62+
host, port = url.hostname, url.port or (443 if url.scheme == "https" else 80)
63+
try:
64+
socket.create_connection((host, port), timeout=3)
65+
except Exception as e:
66+
raise EmbeddingConnectionError("No internet connection") from e
67+
68+
return [], None
69+
70+
embedder = NoInternetEmbedder(language="en", aggregator="Mean")
71+
with pytest.raises(EmbeddingConnectionError, match="No internet connection"):
72+
embedder._transform(dummy_corpus, None)

orangecontrib/text/vectorization/document_embedder.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
from orangecontrib.text import Corpus
1717
from orangecontrib.text.vectorization.base import BaseVectorizer
1818

19+
from Orange.misc.utils.embedder_utils import EmbeddingConnectionError
20+
import socket
21+
from urllib.parse import urlparse
22+
1923
AGGREGATORS = ["mean", "sum", "max", "min"]
2024
AGGREGATORS_ITEMS = ['Mean', 'Sum', 'Max', 'Min']
2125
# fmt: off
@@ -87,10 +91,36 @@ def _transform(
8791
server_url="https://api.garaza.io",
8892
embedder_type="text",
8993
)
90-
embs = embedder.embedd_data(
91-
list(corpus.ngrams) if isinstance(corpus, Corpus) else corpus,
92-
callback=callback,
93-
)
94+
95+
try:
96+
url = urlparse(embedder.server_url)
97+
host, port = url.hostname, url.port or (443 if url.scheme == "https" else 80)
98+
99+
try:
100+
sock = socket.create_connection((host, port), timeout=3)
101+
sock.close()
102+
except socket.gaierror as e:
103+
try:
104+
socket.gethostbyname("example.com")
105+
raise ConnectionError("The server is not responding (bad hostname)") from e
106+
except socket.gaierror:
107+
raise OSError("No internet connection (DNS failure)") from e
108+
except (ConnectionRefusedError, socket.timeout, OSError):
109+
raise ConnectionError("The server is not responding (socket check)")
110+
111+
embs = embedder.embedd_data(
112+
list(corpus.ngrams) if isinstance(corpus, Corpus) else corpus,
113+
callback=callback,
114+
)
115+
if not embs or all(e is None for e in embs):
116+
raise ConnectionError("The server is not responding (no embeddings returned)")
117+
118+
except OSError as e:
119+
raise EmbeddingConnectionError("No internet connection") from e
120+
except ConnectionError as e:
121+
raise EmbeddingConnectionError("The server is not responding") from e
122+
except Exception as e:
123+
raise EmbeddingConnectionError(f"Unknown network error: {e}") from e
94124

95125
if isinstance(corpus, list):
96126
return embs
@@ -167,4 +197,4 @@ async def _encode_data_instance(self, data_instance: Any) -> Optional[bytes]:
167197
if __name__ == '__main__':
168198
with DocumentEmbedder(language='en', aggregator='Max') as embedder:
169199
embedder.clear_cache()
170-
embedder(Corpus.from_file('deerwester'))
200+
embedder(Corpus.from_file('deerwester'))

orangecontrib/text/widgets/owdocumentembedding.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from typing import Dict, Optional, Any
2-
32
from AnyQt.QtCore import Qt
43
from AnyQt.QtWidgets import QVBoxLayout, QPushButton, QStyle
54
from Orange.misc.utils.embedder_utils import EmbeddingConnectionError
65
from Orange.widgets import gui
76
from Orange.widgets.settings import Setting
87
from Orange.widgets.widget import Msg, Output, OWWidget
8+
import os
9+
import socket
910

1011
from orangecontrib.text.corpus import Corpus
1112
from orangecontrib.text.language import (
@@ -50,8 +51,10 @@ class Outputs(OWBaseVectorizer.Outputs):
5051

5152
class Error(OWWidget.Error):
5253
no_connection = Msg(
53-
"No internet connection. Please establish a connection or use "
54-
"another vectorizer."
54+
"No internet connection. Please establish a connection or use another vectorizer."
55+
)
56+
server_unresponsive = Msg(
57+
"The server at {server} is not responding. Please check the address or try again later."
5558
)
5659
unexpected_error = Msg("Embedding error: {}")
5760

@@ -150,8 +153,23 @@ def on_done(self, result):
150153
def on_exception(self, ex: Exception):
151154
self.cancel_button.setDisabled(True)
152155
if isinstance(ex, EmbeddingConnectionError):
153-
self.Error.no_connection()
156+
# check basic internet connectivity (attempt connection to 8.8.8.8:53)
157+
internet_available = False
158+
try:
159+
socket.create_connection(("8.8.8.8", 53), timeout=3)
160+
internet_available = True
161+
except OSError:
162+
internet_available = False
163+
164+
if internet_available:
165+
# internet works → the server may be invalid or unresponsive
166+
server_url = os.getenv("ORANGE_EMBEDDING_API_URL", "https://api.garaza.io")
167+
self.Error.server_unresponsive(server=server_url)
168+
else:
169+
# general lack of internet connection
170+
self.Error.no_connection()
154171
else:
172+
# delegate other errors to the original handler
155173
self.Error.unexpected_error(str(ex))
156174
self.cancel()
157175

@@ -190,4 +208,4 @@ def send_report(self):
190208
if __name__ == "__main__":
191209
from orangewidget.utils.widgetpreview import WidgetPreview
192210

193-
WidgetPreview(OWDocumentEmbedding).run(Corpus.from_file("book-excerpts"))
211+
WidgetPreview(OWDocumentEmbedding).run(Corpus.from_file("book-excerpts"))

0 commit comments

Comments
 (0)