CodexEsto
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎assets/textpipe.jpg‎
46.8 KB b/‎assets/textpipe.jpg‎
46.8 KB
diff --git a/‎assets/textpipe.png‎
7.66 KB b/‎assets/textpipe.png‎
7.66 KB
diff --git a/‎assets/textpipe.svg‎
Lines changed: 122 additions & 0 deletions b/‎assets/textpipe.svg‎
Lines changed: 122 additions & 0 deletions
diff --git a/‎assets/textpipeRB.png‎
21.5 KB b/‎assets/textpipeRB.png‎
21.5 KB
diff --git a/‎data/news-article-categories.csv‎
Lines changed: 12071 additions & 0 deletions b/‎data/news-article-categories.csv‎
Lines changed: 12071 additions & 0 deletions
diff --git a/‎test.py‎
Lines changed: 69 additions & 0 deletions b/‎test.py‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎tests/config/test_nltk.py‎
Lines changed: 7 additions & 7 deletions b/‎tests/config/test_nltk.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎tests/data/test_cleaner.py‎
Lines changed: 28 additions & 28 deletions b/‎tests/data/test_cleaner.py‎
Lines changed: 28 additions & 28 deletions
@@ -12,7 +12,7 @@
 <br />
 <div align="center">
   <a href="https://github.com/CodexEsto/textpipe">
-    <img src="images/logo.png" alt="Logo" width="120" height="140">
+    <img src="assets/textpipeRB.png" alt="Logo" width="230" height="150">
   </a>
 
   <h3 align="center">textpipe</h3>
 
@@ -0,0 +1,69 @@
+from textpipe.data.model_io import (
+    save_model,
+    load_model,
+    save_vectorizer,
+    load_vectorizer,
+)
+from textpipe.core.recommender import ContentRecommender
+from textpipe.pipeline import SuggestionPipeline
+from textpipe.config import Config
+
+# Load the configuration
+config = Config.get()
+
+# Sample text data
+sample_texts = [
+    # Education
+    "Online learning platforms are transforming education.",
+    "Teachers use AI tools to personalize lessons.",
+    "Distance learning increased during the pandemic.",
+    # Politics
+    "The government passed a new environmental bill.",
+    "Elections will be held next year amid rising tensions.",
+    "A new policy was announced by the health minister.",
+    # Technology
+    "Quantum computing promises exponential speedup.",
+    "New AI models are revolutionizing software development.",
+    "Cybersecurity threats are growing in the tech world.",
+    # Environment
+    "Climate change is causing rising sea levels.",
+    "Renewable energy is crucial for sustainability.",
+    "Deforestation threatens biodiversity globally.",
+    # Business
+    "Stock markets surged after the merger announcement.",
+    "Startups are attracting record venture capital funding.",
+    "Remote work is reshaping corporate culture.",
+    # Sports
+    "The team won the championship after a tough season.",
+    "The Olympic Games are scheduled for next summer.",
+    "Athletes are training hard for the world cup."
+]
+
+
+# Initialize the pipeline using loaded config
+pipeline = SuggestionPipeline(config=config)
+
+# Fit the pipeline with the sample data
+pipeline.fit(sample_texts)
+
+# Save model and vectorizer
+# save_model(pipeline.recommender, "models/recommender.pkl")
+# save_vectorizer(pipeline.vectorizer, "models/vectorizer.pkl")
+
+# Load saved model and vectorizer
+# loaded_recommender = load_model("models/recommender.pkl")
+# loaded_vectorizer = load_vectorizer("models/vectorizer.pkl")
+
+# Create a new pipeline with loaded objects
+# new_pipeline = SuggestionPipeline(config=config)
+# new_pipeline.recommender = loaded_recommender
+# new_pipeline.vectorizer = loaded_vectorizer
+
+# Query for testing
+query = "How is AI changing education?"
+recommendations = pipeline.suggest(query, k=3)
+
+# Display recommendations
+print(f"Recommendations for the query '{query}':")
+for rec in recommendations:
+    print(rec)
@@ -14,10 +14,10 @@
 #     """
 #     # Mock user home to temporary path
 #     monkeypatch.setattr(os.path, 'expanduser', lambda _: str(tmp_path))
-    
+
 #     # Clear NLTK's internal paths
 #     nltk.data.path.clear()
-    
+
 #     # Remove any existing test data
 #     nltk_dir = tmp_path / "nltk_data"
 #     if nltk_dir.exists():
@@ -29,21 +29,21 @@
 #     Covers both modern and legacy resource access
 #     """
 #     configure_nltk()
-    
+
 #     # Verify modern resources
 #     assert nltk.data.find('tokenizers/punkt'), "Modern punkt resource missing"
 #     assert nltk.data.find('taggers/averaged_perceptron_tagger'), "POS tagger missing"
-    
+
 #     # Verify legacy structure
 #     nltk_dir = os.path.expanduser("~/nltk_data")
-    
+
 #     # Check main symlink
 #     legacy_root = os.path.join(nltk_dir, 'tokenizers/punkt_tab')
 #     assert os.path.islink(legacy_root), "Main legacy symlink not created"
 #     assert os.path.realpath(legacy_root) == os.path.join(nltk_dir, 'tokenizers/punkt')
-    
+
 #     # Check language-specific symlink
 #     legacy_lang = os.path.join(legacy_root, 'english')
 #     if os.path.exists(legacy_lang):  # Some NLTK versions don't have language subdirs
 #         assert os.path.islink(legacy_lang), "Language symlink missing"
-#         assert os.path.realpath(legacy_lang) == os.path.join(nltk_dir, 'tokenizers/punkt/english')
+#         assert os.path.realpath(legacy_lang) == os.path.join(nltk_dir, 'tokenizers/punkt/english')
@@ -1,35 +1,35 @@
-# tests/data/test_cleaner.py
+# # tests/data/test_cleaner.py
 
-import pytest
-from textpipe.data.cleaner import clean_text, remove_stopwords
+# import pytest
+# from textpipe.data.cleaner import clean_text, remove_stopwords
 
 
-def test_clean_text():
-    """Test basic text cleaning functionality."""
-    raw_text = "Hello, world! How's it going?"
-    cleaned_text = clean_text(raw_text)
-    assert cleaned_text == "hello world how's it going", "Basic cleaning failed"
+# def test_clean_text():
+#     """Test basic text cleaning functionality."""
+#     raw_text = "Hello, world! How's it going?"
+#     cleaned_text = clean_text(raw_text)
+#     assert cleaned_text == "hello world how's it going", "Basic cleaning failed"
 
 
-def test_remove_stopwords():
-    """Test stopword removal with case insensitivity."""
-    text = "This is a sample text"
-    stopwords = {"is", "the", "a", "in", "to", "and", "this"}
-    cleaned_text = remove_stopwords(text, stopwords)
-    assert cleaned_text == "sample text", "Stopword removal failed"
+# def test_remove_stopwords():
+#     """Test stopword removal with case insensitivity."""
+#     text = "This is a sample text"
+#     stopwords = {"is", "the", "a", "in", "to", "and", "this"}
+#     cleaned_text = remove_stopwords(text, stopwords)
+#     assert cleaned_text == "sample text", "Stopword removal failed"
 
 
-@pytest.mark.parametrize(
-    "raw_text, expected_cleaned_text",
-    [
-        ("Hello!!!", "hello"),
-        ("Python is awesome.", "python is awesome"),
-        ("   Space   and   punctuation!   ", "space and punctuation"),
-    ],
-)
-def test_clean_text_various_cases(raw_text, expected_cleaned_text):
-    """Test edge cases and various input formats."""
-    cleaned_text = clean_text(raw_text)
-    assert cleaned_text == expected_cleaned_text, (
-        f"Expected '{expected_cleaned_text}', got '{cleaned_text}'"
-    )
+# @pytest.mark.parametrize(
+#     "raw_text, expected_cleaned_text",
+#     [
+#         ("Hello!!!", "hello"),
+#         ("Python is awesome.", "python is awesome"),
+#         ("   Space   and   punctuation!   ", "space and punctuation"),
+#     ],
+# )
+# def test_clean_text_various_cases(raw_text, expected_cleaned_text):
+#     """Test edge cases and various input formats."""
+#     cleaned_text = clean_text(raw_text)
+#     assert cleaned_text == expected_cleaned_text, (
+#         f"Expected '{expected_cleaned_text}', got '{cleaned_text}'"
+#     )