diff --git a/scrap/medium_scraping.py b/scrap/medium_scraping.py
new file mode 100644
index 0000000..47405de
--- /dev/null
+++ b/scrap/medium_scraping.py
@@ -0,0 +1,65 @@
+import feedparser
+from datetime import datetime
+from typing import List, Dict, Optional
+import time
+
+SOURCE_SITE = "medium"
+
+RSS_FEEDS = [
+    "https://medium.com/feed/tag/artificial-intelligence",
+    "https://medium.com/feed/tag/machine-learning",
+    "https://medium.com/feed/tag/deep-learning",
+    "https://medium.com/feed/tag/ai",
+]
+
+def normalize_medium_entry(entry: feedparser.FeedParserDict) -> Dict:
+    """Normalise une entrée RSS Medium dans le format unifié."""
+    entry_id = entry.get('link', '')
+    
+    published_date = datetime.utcnow().isoformat()
+    if getattr(entry, "published_parsed", None):
+        published_date = datetime.fromtimestamp(time.mktime(entry.published_parsed)).isoformat()
+    
+    keywords = [tag.term for tag in entry.get('tags', [])] if 'tags' in entry else []
+
+    return {
+        "id": entry_id,
+        "source_site": SOURCE_SITE,
+        "title": entry.get('title', 'N/A'),
+        "description": entry.get('summary', 'N/A'),
+        "author_info": entry.get('author', 'N/A'),
+        "keywords": ", ".join(keywords),
+        "content_url": entry_id,
+        "published_date": published_date,
+        "item_type": "article",
+    }
+
+def scrape_medium(max_articles_per_feed: int = 10) -> List[Dict]:
+    """Scrape les flux RSS Medium et retourne les éléments unifiés."""
+    all_items = []
+    unique_links = set()
+
+    for feed_url in RSS_FEEDS:
+        print(f"📡 Fetching RSS: {feed_url}")
+        try:
+            feed = feedparser.parse(feed_url)
+            
+            for entry in feed.entries[:max_articles_per_feed]:
+                link = entry.get('link')
+                if link and link not in unique_links:
+                    all_items.append(normalize_medium_entry(entry))
+                    unique_links.add(link)
+            
+        except Exception as e:
+            print(f"❌ Error fetching {feed_url}: {e}")
+        time.sleep(1)
+
+    return all_items
+
+if __name__ == "__main__":
+    results = scrape_medium(max_articles_per_feed=2)
+    print(f"Total Medium items scraped: {len(results)}")
+    if results:
+        print("\nExemple d'élément unifié:")
+        import json
+        print(json.dumps(results[0], indent=2))
\ No newline at end of file
diff --git a/scrap/scrap_arxiv.py b/scrap/scrap_arxiv.py
new file mode 100644
index 0000000..868b857
--- /dev/null
+++ b/scrap/scrap_arxiv.py
@@ -0,0 +1,59 @@
+import arxiv
+from datetime import datetime
+from typing import List, Dict
+
+# Constantes de l'outil de veille
+SOURCE_SITE = "arxiv"
+CATEGORY = "cs.LG" 
+
+def normalize_arxiv_result(paper: arxiv.Result) -> Dict:
+    """Normalise un résultat arXiv dans le format unifié."""
+    
+    authors = ", ".join([a.name for a in paper.authors])
+    
+    link = paper.entry_id
+    
+    keywords_list = [paper.primary_category]
+    if paper.categories:
+        keywords_list.extend(paper.categories)
+    
+    return {
+        "id": link,
+        "source_site": SOURCE_SITE,
+        "title": paper.title.replace('\n', ' '),
+        "description": paper.summary.replace('\n', ' '),
+        "author_info": authors,
+        "keywords": ", ".join(keywords_list),
+        "content_url": link,
+        "published_date": paper.published.isoformat(),
+        "item_type": "paper",
+    }
+
+def scrape_arxiv(category: str = CATEGORY, max_results: int = 10) -> List[Dict]:
+    """Scrape arXiv pour une catégorie et retourne les éléments unifiés."""
+    
+    try:
+        search = arxiv.Search(
+            query=f"cat:{category}",
+            max_results=max_results,
+            sort_by=arxiv.SortCriterion.SubmittedDate,
+            sort_order=arxiv.SortOrder.Descending
+        )
+        
+        normalized_results = []
+        for result in search.results():
+            normalized_results.append(normalize_arxiv_result(result))
+            
+        return normalized_results
+        
+    except Exception as e:
+        print(f"[ERREUR] arXiv Search: {e}")
+        return []
+
+if __name__ == "__main__":
+    results = scrape_arxiv(max_results=5)
+    print(f"Total arXiv items scraped: {len(results)}")
+    if results:
+        print("\nExemple d'élément unifié:")
+        import json
+        print(json.dumps(results[0], indent=2))
\ No newline at end of file
diff --git a/scrap/scrap_le_monde.py b/scrap/scrap_le_monde.py
new file mode 100644
index 0000000..baf74d7
--- /dev/null
+++ b/scrap/scrap_le_monde.py
@@ -0,0 +1,71 @@
+import feedparser
+import time
+from datetime import datetime
+from typing import List, Dict
+
+SOURCE_SITE = "le_monde"
+
+FEEDS = [
+    "https://www.lemonde.fr/international/rss_full.xml",
+    "https://www.lemonde.fr/actualite-medias/rss_full.xml",
+    "https://www.lemonde.fr/en_continu/rss_full.xml"
+]
+
+def normalize_lemonde_entry(entry: feedparser.FeedParserDict, feed_url: str) -> Dict:
+    """Normalise une entrée RSS Le Monde dans le format unifié."""
+    entry_id = getattr(entry, "id", None) or getattr(entry, "link", None)
+    
+    published_date = datetime.utcnow().isoformat()
+    if getattr(entry, "published_parsed", None):
+        published_date = datetime.fromtimestamp(time.mktime(entry.published_parsed)).isoformat()
+    elif getattr(entry, "updated_parsed", None):
+        published_date = datetime.fromtimestamp(time.mktime(entry.updated_parsed)).isoformat()
+        
+    category = "actualité générale"
+    if "international" in feed_url:
+        category = "international"
+    elif "medias" in feed_url:
+        category = "actualité médias"
+    elif "continu" in feed_url:
+        category = "en continu"
+
+    return {
+        "id": entry_id,
+        "source_site": SOURCE_SITE,
+        "title": getattr(entry, "title", ""),
+        "description": getattr(entry, "summary", ""),
+        "author_info": getattr(entry, "author", "Le Monde"),
+        "keywords": category,
+        "content_url": getattr(entry, "link", ""),
+        "published_date": published_date,
+        "item_type": "article",
+    }
+
+def scrape_lemonde(feeds: List[str] = FEEDS) -> List[Dict]:
+    """Scrape les flux RSS Le Monde et retourne les éléments unifiés."""
+    all_items = []
+    unique_ids = set()
+
+    for feed_url in feeds:
+        try:
+            d = feedparser.parse(feed_url)
+            
+            for entry in d.entries:
+                entry_id = getattr(entry, "id", None) or getattr(entry, "link", None)
+                if entry_id and entry_id not in unique_ids:
+                    all_items.append(normalize_lemonde_entry(entry, feed_url))
+                    unique_ids.add(entry_id)
+                    
+        except Exception as e:
+            print(f"[ERREUR] du fetch du feed {feed_url}: {e}")
+        time.sleep(1)
+
+    return all_items
+
+if __name__ == "__main__":
+    results = scrape_lemonde(feeds=FEEDS[:1])
+    print(f"Total Le Monde items scraped: {len(results)}")
+    if results:
+        print("\nExemple d'élément unifié:")
+        import json
+        print(json.dumps(results[0], indent=2))
\ No newline at end of file
diff --git a/scrap/scrape_github.py b/scrap/scrape_github.py
new file mode 100644
index 0000000..b775dbe
--- /dev/null
+++ b/scrap/scrape_github.py
@@ -0,0 +1,130 @@
+import os
+import requests
+from datetime import datetime, UTC
+from typing import List, Dict
+
+SOURCE_SITE = "github"
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") 
+
+
+THEMES = [
+    "large-language-model", "llm", "transformer", "text-generation", "retrieval-augmented-generation",
+    "rag", "agents", "chatbot", "fine-tuning", "quantization", "lora", "peft",
+    "diffusion", "stable-diffusion", "image-generation", "multimodal",
+    "speech-to-text", "speech-synthesis", "audio", "reinforcement-learning",
+    "computer-vision",
+]
+
+HEADERS = {
+    "Accept": "application/vnd.github+json",
+    "User-Agent": "github-ai-theme-watcher/1.0"
+}
+if GITHUB_TOKEN:
+    HEADERS["Authorization"] = f"Bearer {GITHUB_TOKEN}"
+
+class RateLimitError(Exception):
+    def __init__(self, retry_after=None):
+        self.retry_after = retry_after
+        super().__init__("Rate limit hit on GitHub API. Retry after: {}".format(retry_after))
+
+def sanitize_text(s):
+    return str(s) if s is not None else ""
+
+def normalize_github_repo(repo: Dict, theme: str) -> Dict:
+    full_name = repo.get("full_name")
+    keywords_list = [theme, repo.get("language") or ""]
+    if repo.get("topics"):
+        keywords_list.extend(repo.get("topics"))
+    updated_at = repo.get("updated_at") or repo.get("pushed_at") or datetime.now(UTC).isoformat()
+    return {
+        "id": full_name, "source_site": SOURCE_SITE, "title": repo.get("name"),
+        "description": sanitize_text(repo.get("description")), "author_info": repo.get("owner", {}).get("login", ""),
+        "keywords": ", ".join(filter(None, keywords_list)), "content_url": repo.get("html_url") or f"https://github.com/{full_name}",
+        "published_date": updated_at, "item_type": "repository",
+    }
+
+def build_query_for_theme(theme: str) -> str:
+    theme_token = theme.replace(" ", "+")
+    q = f"{theme_token} in:name,description,readme stars:>50"
+    return q
+
+
+def search_github_repos(query: str, per_page: int = 20) -> List[Dict]:
+    """
+    Recherche des repositories GitHub.
+    Lève RateLimitError ou retourne List[Dict] (vide ou pleine).
+    """
+    url = "https://api.github.com/search/repositories"
+    params = {
+        "q": query,
+        "sort": "stars",
+        "order": "desc",
+        "per_page": per_page
+    }
+    
+    try:
+        resp = requests.get(url, headers=HEADERS, params=params, timeout=20)
+        
+        if resp.status_code == 403:
+            retry_after = resp.headers.get("Retry-After")
+            raise RateLimitError(retry_after=int(retry_after) if retry_after and retry_after.isdigit() else None)
+                
+        if resp.status_code != 200:
+             print(f"[WARN] HTTP Status {resp.status_code} for query: {query}")
+             return []
+        
+        data = resp.json()
+        return data.get("items", [])
+        
+    except RateLimitError:
+        raise
+    except requests.exceptions.RequestException as e:
+        print(f"[ERREUR CONNEXION/HTTP] GitHub Search: {e}")
+        return []
+    except Exception as e:
+        print(f"[ERREUR INCONNUE/JSON] GitHub Search: {e}")
+        return []
+
+
+def scrape_github(themes: List[str] = THEMES, limit_per_theme: int = 20) -> List[Dict]:
+    """Scrape GitHub pour les thèmes donnés et retourne les éléments unifiés."""
+    
+    all_items = []
+    stop_scraping = False
+    
+    try:
+        for theme in themes:
+            if stop_scraping:
+                break
+                
+            q = build_query_for_theme(theme)
+            print(f"-> Recherche thème '{theme}' (q={q})")
+            
+            try:
+                items = search_github_repos(q, limit_per_theme)
+                
+                if not isinstance(items, list):
+                    print(f"[FATAL WARN] search_github_repos a retourné {type(items)} au lieu de list. Arrêt.")
+                    stop_scraping = True
+                    continue
+                
+                normalized_items = [normalize_github_repo(repo, theme) for repo in items]
+                all_items.extend(normalized_items)
+                
+            except RateLimitError:
+                print(f"[RATE LIMIT] Limite atteinte. Arrêt de la veille GitHub pour cette itération.")
+                stop_scraping = True
+            except Exception as e:
+                print(f"[ERREUR THÈME] '{theme}': {e}")
+                continue 
+                
+    finally:
+        return all_items 
+
+if __name__ == "__main__":
+    results = scrape_github(themes=["llm"], limit_per_theme=5)
+    print(f"Total GitHub items scraped: {len(results)}")
+    if results:
+        import json
+        print("\nExemple d'élément unifié:")
+        print(json.dumps(results[0], indent=2))
\ No newline at end of file
diff --git a/scrap/scrape_hf.py b/scrap/scrape_hf.py
new file mode 100644
index 0000000..1c19c55
--- /dev/null
+++ b/scrap/scrape_hf.py
@@ -0,0 +1,93 @@
+import requests
+from datetime import datetime, UTC
+from typing import List, Dict
+
+SOURCE_SITE = "huggingface"
+
+def build_url(item: Dict, item_type: str) -> str:
+    """Construit l’URL publique de l’élément"""
+    base = "https://huggingface.co"
+    item_id = item.get("id")
+    if item_type == "model":
+        return f"{base}/{item.get('modelId')}"
+    elif item_type in ("dataset", "space", "collection", "paper"): 
+        return f"{base}/{item_id}"
+    return base
+
+def normalize_huggingface_item(item: Dict, item_type: str) -> Dict:
+    """Normalise un élément Hugging Face dans le format unifié."""
+    item_name = item.get("name") or item.get("modelId") or item.get("id")
+    item_id = item.get("id") or item.get("modelId") or item.get("name")
+    
+    author = item.get("author") or item.get("organization", "")
+    
+    description = item.get("description", item_name)
+    
+    keywords_list = []
+    if item.get("tags"):
+        keywords_list.extend(item.get("tags"))
+    if item.get("pipeline_tag"):
+        tag = item.get("pipeline_tag")
+        keywords_list.append(tag if isinstance(tag, str) else ", ".join(tag))
+    
+    last_modified = item.get("lastModified") or item.get("last_modified") or datetime.now(UTC).isoformat()
+
+    return {
+        "id": item_id,
+        "source_site": SOURCE_SITE,
+        "title": item_name,
+        "description": description,
+        "author_info": author,
+        "keywords": ", ".join(keywords_list),
+        "content_url": build_url(item, item_type),
+        "published_date": last_modified,
+        "item_type": item_type,
+    }
+
+def fetch_huggingface_api(endpoint: str, item_type: str, limit: int = 20) -> List[Dict]:
+    """Récupère les données d'un endpoint spécifique et les normalise."""
+    url = f"https://huggingface.co/api/{endpoint}?sort=lastModified&direction=-1&limit={limit}"
+    
+    try:
+        r = requests.get(url, timeout=20)
+        
+        if r.status_code == 404:
+            return []
+        
+        r.raise_for_status()
+        
+        items = r.json()
+        
+        normalized_items = [normalize_huggingface_item(item, item_type) for item in items]
+        return normalized_items
+        
+    except Exception as e:
+        print(f"[ERREUR] HF {item_type}: {e}")
+        return []
+
+def scrape_huggingface(limit_per_type: int = 20) -> List[Dict]:
+    """Scrape le Hugging Face Hub, ignorant l'endpoint 'organizations'."""
+    
+    fetchers = [
+        ("models", "model"),
+        ("datasets", "dataset"),
+        ("spaces", "space"),
+        ("collections", "collection"),
+        ("papers", "paper"),
+    ]
+    
+    all_items = []
+    
+    for endpoint, item_type in fetchers:
+        items = fetch_huggingface_api(endpoint, item_type, limit_per_type)
+        all_items.extend(items)
+        
+    return all_items
+
+if __name__ == "__main__":
+    results = scrape_huggingface(limit_per_type=5)
+    print(f"Total Hugging Face items scraped: {len(results)}")
+    if results:
+        print("\nExemple d'élément unifié:")
+        import json
+        print(json.dumps(results[0], indent=2))
\ No newline at end of file
diff --git a/scrap/unified_scrapper_pipeline.py b/scrap/unified_scrapper_pipeline.py
new file mode 100644
index 0000000..de08f4b
--- /dev/null
+++ b/scrap/unified_scrapper_pipeline.py
@@ -0,0 +1,145 @@
+import sqlite3
+from datetime import datetime, UTC 
+from typing import List, Dict
+import time
+import os
+from scrape_hf import scrape_huggingface
+from scrape_github import scrape_github
+from medium_scraping import scrape_medium
+from scrap_arxiv import scrape_arxiv
+from scrap_le_monde import scrape_lemonde 
+
+
+DB_FILE = "veille_technique_unified.db"
+
+def setup_database():
+    """Initialise la base de données et crée la table unifiée."""
+    conn = sqlite3.connect(DB_FILE)
+    cur = conn.cursor()
+    
+    cur.execute("""
+    CREATE TABLE IF NOT EXISTS unified_data (
+        id TEXT PRIMARY KEY,
+        source_site TEXT NOT NULL,
+        title TEXT NOT NULL,
+        description TEXT,
+        author_info TEXT,
+        keywords TEXT,
+        content_url TEXT NOT NULL,
+        published_date TEXT,
+        item_type TEXT,
+        created_at TIMESTAMP
+    )
+    """)
+    conn.commit()
+    conn.close()
+
+def save_unified_item(item: Dict, conn: sqlite3.Connection):
+    """Insère un élément unifié dans la base de données."""
+    cur = conn.cursor()
+    now = datetime.now(UTC).isoformat()
+    
+    cur.execute("""
+    INSERT OR IGNORE INTO unified_data 
+    (id, source_site, title, description, author_info, keywords, content_url, published_date, item_type, created_at)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+    """, (
+        item["id"],
+        item["source_site"],
+        item["title"],
+        item["description"],
+        item["author_info"],
+        item["keywords"],
+        item["content_url"],
+        item["published_date"],
+        item["item_type"],
+        now
+    ))
+    conn.commit()
+
+def run_scrapers_and_save():
+    """Exécute tous les scrapers, collecte les données et les sauvegarde."""
+    print("--- Démarrage du Pipeline de Veille Technique ---")
+    setup_database()
+    
+    conn = sqlite3.connect(DB_FILE)
+    
+    scrapers = [
+        ("Hugging Face", scrape_huggingface, 10),
+        ("GitHub", scrape_github, 5),
+        ("Medium", scrape_medium, 5),
+        ("arXiv", scrape_arxiv, 10),
+        ("Le Monde", scrape_lemonde, None),
+    ]
+    
+    total_new_items = 0
+    
+    for name, scraper_func, limit in scrapers:
+        print(f"\n🚀 Lancement du scraper : **{name}**")
+        
+        try:
+            items = scraper_func(limit) if limit is not None else scraper_func() 
+            
+            
+            if items is None:
+                print(f"   ❌ **ALERTE: Le scraper {name} a retourné None. Skipping.**")
+                continue
+            
+            try:
+                iter(items)
+                
+            except TypeError:
+                print(f"   ❌ **ERREUR FATALE (Non-Itérable)**: Le scraper {name} a retourné un type non itérable ({type(items)}). Skipping.")
+                continue
+
+            if not isinstance(items, list):
+                 print(f"   ⚠️ WARNING: Le scraper {name} a retourné un objet itérable ({type(items)}) mais pas une liste. Conversion en liste.")
+                 items = list(items)
+                 
+            print(f"   -> {len(items)} éléments récupérés.")
+            
+            count_saved = 0
+            for item in items:
+                save_unified_item(item, conn)
+                count_saved += 1
+            
+            print(f"   -> {count_saved} éléments insérés/vérifiés dans la base de données.")
+            total_new_items += count_saved
+            
+        except Exception as e:
+            print(f"   ❌ **ERREUR FATALE** lors du scraping {name}: {e}")
+            
+    conn.close()
+    print(f"\n--- Pipeline Terminé. {total_new_items} éléments traités. ---")
+    print(f"Base de données unifiée : **{DB_FILE}**")
+
+def check_results():
+    """Affiche les 5 premières entrées de la base de données unifiée."""
+    conn = sqlite3.connect(DB_FILE)
+    cur = conn.cursor()
+    
+    cur.execute("SELECT * FROM unified_data LIMIT 5")
+    rows = cur.fetchall()
+    
+    print("\n--- Aperçu des Résultats Unifiés (5 premières lignes) ---")
+    if not rows:
+        print("La base de données est vide.")
+        return
+
+    column_names = [description[0] for description in cur.description]
+    print(f"Colonnes: {column_names}")
+    print("-" * 120)
+
+    for row in rows:
+        print(row)
+        
+    cur.execute("SELECT COUNT(*) FROM unified_data")
+    total_count = cur.fetchone()[0]
+    print(f"\nTotal des éléments dans la DB : **{total_count}**")
+    
+    conn.close()
+
+
+if __name__ == "__main__":
+    run_scrapers_and_save()
+    check_results()
\ No newline at end of file