Skip to content

Commit c4552bd

Browse files
committed
add support for relative file paths
1 parent 8831881 commit c4552bd

File tree

5 files changed

+80
-5
lines changed

5 files changed

+80
-5
lines changed

orangecontrib/text/corpus.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import scipy.sparse as sp
2424

2525
from orangecontrib.text.language import ISO2LANG
26+
from orangecontrib.text.path import fix_relative_path, fix_absolute_path
2627

2728

2829
def get_sample_corpora_dir():
@@ -594,7 +595,10 @@ def from_table_rows(cls, source, row_indices):
594595
return c
595596

596597
@classmethod
597-
def from_file(cls, filename, sheet=None):
598+
def from_file(cls, filename, sheet=None, relative_to=None):
599+
if relative_to:
600+
filename = fix_absolute_path(filename, relative_to)
601+
598602
if not os.path.exists(filename): # check the default location
599603
abs_path = os.path.join(get_sample_corpora_dir(), filename)
600604
if not abs_path.endswith('.tab'):
@@ -609,6 +613,13 @@ def from_file(cls, filename, sheet=None):
609613
name = table.name
610614
table = cls.from_numpy(table.domain, table.X, table.Y, table.metas, table.W, attributes=table.attributes)
611615
table.name = name
616+
617+
# Save relative path if possible (for reopening later)
618+
if relative_to:
619+
table.attributes["path"] = fix_relative_path(filename, relative_to)
620+
else:
621+
table.attributes["path"] = filename
622+
612623
return table
613624

614625
@staticmethod
@@ -654,7 +665,6 @@ def retain_preprocessing(orig, new, key=...):
654665
new._set_unique_titles()
655666
new._infer_text_features()
656667

657-
658668
@summarize.register(Corpus)
659669
def summarize_corpus(corpus: Corpus) -> PartialSummary:
660670
"""

orangecontrib/text/path.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import os
2+
3+
def fix_relative_path(path, base):
4+
"""Return path relative to base, if possible."""
5+
try:
6+
return os.path.relpath(path, base)
7+
except ValueError:
8+
return path
9+
10+
def fix_absolute_path(path, base):
11+
"""Return absolute path by joining base and relative path."""
12+
if not os.path.isabs(path):
13+
return os.path.abspath(os.path.join(base, path))
14+
return path

orangecontrib/text/widgets/owcorpus.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
migrate_language_name,
2525
)
2626
from orangecontrib.text.widgets.utils import widgets, QSize
27+
from orangecontrib.text.path import fix_relative_path, fix_absolute_path
2728

2829

2930
class CorpusContextHandler(DomainContextHandler):
@@ -369,6 +370,22 @@ def describe(features):
369370
('Other features', describe(domain.attributes)),
370371
('Target', describe(domain.class_vars)),
371372
))
373+
374+
def save_settings(self, settings):
375+
if hasattr(self, "corpus_path") and self.corpus_path:
376+
if hasattr(self, "workflow_file") and self.workflow_file:
377+
base = os.path.dirname(self.workflow_file)
378+
settings["corpus_path"] = fix_relative_path(self.corpus_path, base)
379+
else:
380+
settings["corpus_path"] = self.corpus_path
381+
382+
def load_settings(self, settings):
383+
path = settings.get("corpus_path")
384+
if path and hasattr(self, "workflow_file"):
385+
base = os.path.dirname(self.workflow_file)
386+
self.corpus_path = fix_absolute_path(path, base)
387+
else:
388+
self.corpus_path = path
372389

373390
@classmethod
374391
def migrate_context(cls, context, version):

orangecontrib/text/widgets/tests/test_owcorpus.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import os
22
import tempfile
33
import unittest
4+
import shutil
5+
import pickle
46

57
import numpy as np
68
from Orange.data import Table, Domain, StringVariable, ContinuousVariable
@@ -430,6 +432,38 @@ def test_migrate_settings(self):
430432
self.wait_until_finished(widget=widget)
431433
self.assertIsNone(widget.language)
432434

435+
def test_relative_corpus_path_serialization(self):
436+
"""
437+
Test if relative paths are properly saved and reloaded.
438+
"""
439+
# Create a dummy corpus file
440+
with tempfile.TemporaryDirectory() as tmp_dir:
441+
corpus = Corpus.from_file("book-excerpts")
442+
corpus_path = os.path.join(tmp_dir, "test.corpus")
443+
with open(corpus_path, "wb") as f:
444+
pickle.dump(corpus, f)
445+
446+
# Simulate loading the file into widget
447+
self.widget.workflow_file = os.path.join(tmp_dir, "workflow.ows")
448+
self.widget.corpus_path = corpus_path
449+
450+
settings = {}
451+
self.widget.save_settings(settings)
452+
453+
# Simulate moving workflow and corpus to new directory
454+
with tempfile.TemporaryDirectory() as new_dir:
455+
new_corpus = os.path.join(new_dir, "test.corpus")
456+
new_workflow = os.path.join(new_dir, "workflow.ows")
457+
shutil.copy2(corpus_path, new_corpus)
458+
459+
# Simulate loading settings in new widget
460+
restored = self.create_widget(OWCorpus)
461+
restored.workflow_file = new_workflow
462+
settings["corpus_path"] = os.path.relpath(new_corpus, new_dir)
463+
restored.load_settings(settings)
464+
465+
self.assertTrue(os.path.exists(restored.corpus_path))
466+
self.assertTrue(os.path.isabs(restored.corpus_path))
433467

434468
if __name__ == "__main__":
435469
unittest.main()

orangecontrib/text/widgets/tests/test_owcorpusviewer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def test_search(self):
122122
self.process_events()
123123
out_corpus = self.get_output(self.widget.Outputs.matching_docs)
124124
self.assertEqual(len(out_corpus), 1)
125-
self.assertEqual(self.widget.n_matches, 7)
125+
self.assertEqual(int(self.widget.n_matches), 7)
126126

127127
# first document is selected, when filter with word that is not in
128128
# selected document, first of shown documents is selected
@@ -131,14 +131,14 @@ def test_search(self):
131131
self.process_events()
132132
self.assertEqual(1, len(self.get_output(self.widget.Outputs.matching_docs)))
133133
# word count doesn't depend on selection
134-
self.assertEqual(self.widget.n_matches, 7)
134+
self.assertEqual(int(self.widget.n_matches), 7)
135135

136136
# when filter is removed, matched words is 0
137137
self.widget.regexp_filter = ""
138138
self.widget.refresh_search()
139139
self.process_events()
140140
self.wait_until_finished()
141-
self.assertEqual(self.widget.n_matches, 0)
141+
self.assertEqual(int(self.widget.n_matches), 0)
142142

143143
def test_invalid_regex(self):
144144
# Error is shown when invalid regex is entered

0 commit comments

Comments
 (0)