weaviate
diff --git a/‎CHANGELOG.md‎
Lines changed: 3 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎frontend/app/components/Login/GettingStarted.tsx‎
Lines changed: 24 additions & 9 deletions b/‎frontend/app/components/Login/GettingStarted.tsx‎
Lines changed: 24 additions & 9 deletions
diff --git a/‎goldenverba/components/managers.py‎
Lines changed: 0 additions & 1 deletion b/‎goldenverba/components/managers.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎goldenverba/components/reader/BasicReader.py‎
Lines changed: 176 additions & 3 deletions b/‎goldenverba/components/reader/BasicReader.py‎
Lines changed: 176 additions & 3 deletions
@@ -2,11 +2,13 @@
 
 All notable changes to this project will be documented in this file.
 
-## [2.1.3] Making Verba stable again!
+## [2.1.3] More data types
 
 ## Added
 
 - Added `OLLAMA_MODEL` and `OLLAMA_EMBED_MODEL` environment variables (https://github.com/weaviate/Verba/pull/372)
+- Hiding `Getting Started` display after showing once
+- Added support for `csv` `xlsx` `xls` for the `DefaultReader`
 
 ## [2.1.2] Adding Novita!
 
 
@@ -1,6 +1,6 @@
 "use client";
 
-import React, { useEffect, useRef } from "react";
+import React, { useEffect, useRef, useState } from "react";
 import VerbaButton from "../Navigation/VerbaButton";
 import { FaGithub } from "react-icons/fa";
 import { FaYoutube } from "react-icons/fa";
@@ -18,13 +18,33 @@ const GettingStartedComponent: React.FC<GettingStartedComponentProps> = ({
   addStatusMessage,
 }) => {
   const dialogRef = useRef<HTMLDialogElement>(null);
+  const [shouldShow, setShouldShow] = useState(false);
 
   useEffect(() => {
-    if (dialogRef.current) {
-      dialogRef.current.showModal();
+    // Check if getting_started variable exists in localStorage
+    const gettingStartedSeen = localStorage.getItem("getting_started");
+
+    // Show modal if getting_started doesn't exist or is set to false
+    if (!gettingStartedSeen || gettingStartedSeen === "false") {
+      setShouldShow(true);
+      if (dialogRef.current) {
+        dialogRef.current.showModal();
+      }
     }
   }, []);
 
+  // If we shouldn't show the component, return null
+  if (!shouldShow) {
+    return null;
+  }
+
+  const handleGetStarted = () => {
+    // Set getting_started to true in localStorage
+    localStorage.setItem("getting_started", "true");
+    setShouldShow(false);
+    addStatusMessage("Achievement unlocked: Welcome to Verba!", "SUCCESS");
+  };
+
   return (
     <dialog id={"Getting-Started-Modal"} className="modal" ref={dialogRef}>
       <div className="modal-box w-11/12 max-w-5xl">
@@ -97,12 +117,7 @@ const GettingStartedComponent: React.FC<GettingStartedComponentProps> = ({
               title="Let's get started"
               type="submit"
               selected={true}
-              onClick={() => {
-                addStatusMessage(
-                  "Achievement unlocked: Welcome to Verba!",
-                  "SUCCESS"
-                );
-              }}
+              onClick={handleGetStarted}
               selected_color="bg-primary-verba"
               Icon={FaHeart}
             />
 
@@ -12,7 +12,6 @@
 import asyncio
 import json
 import re
-from urllib.parse import urlparse
 from datetime import datetime
 
 from sklearn.decomposition import PCA
 
@@ -1,6 +1,7 @@
 import base64
 import json
 import io
+import csv
 
 from wasabi import msg
 
@@ -27,17 +28,35 @@
     msg.warn("python-docx not installed, DOCX functionality will be limited.")
     docx = None
 
+try:
+    import pandas as pd
+except ImportError:
+    msg.warn("pandas not installed, Excel functionality will be limited.")
+    pd = None
+
+try:
+    import openpyxl
+except ImportError:
+    msg.warn("openpyxl not installed, Excel functionality will be limited.")
+    openpyxl = None
+
+try:
+    import xlrd
+except ImportError:
+    msg.warn("xlrd not installed, .xls file functionality will be limited.")
+    xlrd = None
+
 
 class BasicReader(Reader):
     """
-    The BasicReader reads text, code, PDF, and DOCX files.
+    The BasicReader reads text, code, PDF, DOCX, CSV, and Excel files.
     """
 
     def __init__(self):
         super().__init__()
         self.name = "Default"
-        self.description = "Ingests text, code, PDF, and DOCX files"
-        self.requires_library = ["pypdf", "docx", "spacy"]
+        self.description = "Ingests text, code, PDF, DOCX, CSV, and Excel files"
+        self.requires_library = ["pypdf", "docx", "spacy", "pandas", "openpyxl"]
         self.extension = [
             ".txt",
             ".py",
@@ -51,6 +70,7 @@ def __init__(self):
             ".docx",
             ".pptx",
             ".xlsx",
+            ".xls",
             ".csv",
             ".ts",
             ".tsx",
@@ -93,6 +113,12 @@ async def load(self, config: dict, fileConfig: FileConfig) -> list[Document]:
                 file_content = await self.load_pdf_file(decoded_bytes)
             elif fileConfig.extension.lower() == "docx":
                 file_content = await self.load_docx_file(decoded_bytes)
+            elif fileConfig.extension.lower() == "csv":
+                file_content = await self.load_csv_file(decoded_bytes)
+            elif fileConfig.extension.lower() in ["xlsx", "xls"]:
+                file_content = await self.load_excel_file(
+                    decoded_bytes, fileConfig.extension.lower()
+                )
             elif fileConfig.extension.lower() in [
                 ext.lstrip(".") for ext in self.extension
             ]:
@@ -150,3 +176,150 @@ async def load_docx_file(self, decoded_bytes: bytes) -> str:
         docx_bytes = io.BytesIO(decoded_bytes)
         reader = docx.Document(docx_bytes)
         return "\n".join(paragraph.text for paragraph in reader.paragraphs)
+
+    async def load_csv_file(self, decoded_bytes: bytes) -> str:
+        """Load and convert CSV file to readable text format."""
+        try:
+            # Try UTF-8 first, fallback to latin-1
+            try:
+                text_content = decoded_bytes.decode("utf-8")
+            except UnicodeDecodeError:
+                text_content = decoded_bytes.decode("latin-1")
+
+            csv_reader = csv.reader(io.StringIO(text_content))
+            rows = list(csv_reader)
+
+            if not rows:
+                return "Empty CSV file"
+
+            # Format as a readable table
+            result = []
+            headers = rows[0] if rows else []
+
+            # Add headers
+            if headers:
+                result.append("Headers: " + " | ".join(headers))
+                result.append(" \n\n")
+
+            # Add data rows
+            for i, row in enumerate(rows[1:], 1):
+                if len(row) == len(headers):
+                    row_data = []
+                    for header, value in zip(headers, row):
+                        row_data.append(f"{header}: {value}")
+                    result.append(f"Row {i}: {' | '.join(row_data)}")
+                else:
+                    # Handle rows with different column counts
+                    result.append(f"Row {i}: {' | '.join(row)}")
+                result.append(" \n\n")
+            return "\n".join(result)
+
+        except Exception as e:
+            raise ValueError(f"Error reading CSV file: {str(e)}")
+
+    async def load_excel_file(self, decoded_bytes: bytes, extension: str) -> str:
+        """Load and convert Excel file to readable text format."""
+        if not pd and not openpyxl:
+            raise ImportError("pandas or openpyxl is required to process Excel files.")
+
+        try:
+            excel_bytes = io.BytesIO(decoded_bytes)
+
+            # Use pandas if available for better support
+            if pd:
+                # Read all sheets
+                if extension == "xlsx":
+                    sheets_dict = pd.read_excel(
+                        excel_bytes, sheet_name=None, engine="openpyxl"
+                    )
+                else:  # xls
+                    try:
+                        sheets_dict = pd.read_excel(
+                            excel_bytes, sheet_name=None, engine="xlrd"
+                        )
+                    except Exception as e:
+                        # Try auto engine detection as fallback
+                        try:
+                            sheets_dict = pd.read_excel(
+                                excel_bytes, sheet_name=None, engine=None
+                            )
+                        except Exception:
+                            raise ImportError(
+                                f"Cannot read .xls file. Please install 'xlrd' for .xls support: pip install xlrd. "
+                                f"Original error: {str(e)}"
+                            )
+
+                result = []
+
+                for sheet_name, df in sheets_dict.items():
+                    result.append(f"\nSheet: {sheet_name}")
+
+                    if df.empty:
+                        result.append("(Empty sheet)")
+                        continue
+
+                    result.append(" \n\n")
+
+                    # Add column headers
+                    headers = df.columns.tolist()
+                    result.append("Headers: " + " | ".join(str(h) for h in headers))
+                    result.append(" \n\n")
+
+                    for idx, (_, row) in enumerate(df.iterrows()):
+                        row_data = []
+                        for header, value in zip(headers, row):
+                            # Handle NaN values
+                            display_value = str(value) if pd.notna(value) else ""
+                            row_data.append(f"{header}: {display_value}")
+                        result.append(f"Row {idx + 1}: {' | '.join(row_data)}")
+                        result.append(" \n\n")
+
+                return "\n".join(result)
+
+            else:
+                # Fallback to openpyxl for basic reading
+                if extension != "xlsx":
+                    raise ImportError(
+                        "openpyxl only supports .xlsx files. Please install pandas for .xls support."
+                    )
+
+                from openpyxl import load_workbook
+
+                workbook = load_workbook(excel_bytes, data_only=True)
+
+                result = []
+
+                for sheet_name in workbook.sheetnames:
+                    sheet = workbook[sheet_name]
+                    result.append(f"\nSheet: {sheet_name}")
+                    result.append(" \n\n")
+
+                    rows_data = []
+                    for row in sheet.iter_rows(values_only=True):
+                        if any(cell is not None for cell in row):  # Skip empty rows
+                            rows_data.append(
+                                [str(cell) if cell is not None else "" for cell in row]
+                            )
+
+                    if not rows_data:
+                        result.append("(Empty sheet)")
+                        continue
+
+                    # Add headers and data
+                    headers = rows_data[0] if rows_data else []
+                    result.append("Headers: " + " | ".join(headers))
+                    result.append(" \n\n")
+
+                    for i, row in enumerate(rows_data[1:], 1):
+                        if len(row) == len(headers):
+                            row_data = [f"{h}: {v}" for h, v in zip(headers, row)]
+                            result.append(f"Row {i}: {' | '.join(row_data)}")
+                            result.append(" \n\n")
+                        else:
+                            result.append(f"Row {i}: {' | '.join(row)}")
+                            result.append(" \n\n")
+
+                return "\n".join(result)
+
+        except Exception as e:
+            raise ValueError(f"Error reading Excel file: {str(e)}")