handle csv

Josh-XT · Josh-XT · commit 2ce47c1f58b6 · 2025-12-12T22:20:57.000-05:00
diff --git a/agixt/XT.py b/agixt/XT.py
@@ -1131,13 +1131,27 @@ def _get_file_access_instructions(
             actual_path = converted_file_name if converted_file_name else file_name
 
         # Base info about the file
-        info = f"## Uploaded File: `{file_name}`\n"
-        info += f"- **Size:** {file_size_kb} KB ({file_tokens} tokens)\n"
-        info += f"- **Path for commands:** `{actual_path}`\n"
-        info += f"- **URL:** [{file_name}]({file_url})\n"
+        # For xlsx/xls files that were converted, emphasize the CSV as the primary file to use
+        if (
+            converted_file_name
+            and converted_file_name != file_name
+            and file_type
+            and file_type.lower() in ["xlsx", "xls"]
+        ):
+            info = f"## File Available: `{converted_file_name}` (converted from `{file_name}`)\n"
+            info += f"- **Original file:** `{file_name}` (Excel format - do NOT read this directly)\n"
+            info += f"- **Use this file:** `{actual_path}` (CSV format - use this for all commands)\n"
+            info += f"- **Size:** {file_size_kb} KB ({file_tokens} tokens)\n"
+            info += f"- **URL:** [{file_name}]({file_url})\n"
+            info += "\n**IMPORTANT:** The Excel file has been converted to CSV. Always use the CSV file (`{actual_path}`) for Read File and pandas operations.\n"
+        else:
+            info = f"## Uploaded File: `{file_name}`\n"
+            info += f"- **Size:** {file_size_kb} KB ({file_tokens} tokens)\n"
+            info += f"- **Path for commands:** `{actual_path}`\n"
+            info += f"- **URL:** [{file_name}]({file_url})\n"
 
-        if converted_file_name and converted_file_name != file_name:
-            info += f"- **Converted to:** `{converted_file_name}` (CSV format)\n"
+            if converted_file_name and converted_file_name != file_name:
+                info += f"- **Converted to:** `{converted_file_name}` (CSV format)\n"
 
         info += "\n"
 
@@ -1228,9 +1242,9 @@ async def learn_spreadsheet(
                         csv_file_path = file_path.replace(f".{file_type}", ".csv")
                         csv_file_name = os.path.basename(csv_file_path)
                         df.to_csv(csv_file_path, index=False)
-                        string_file_content += f"Content from file uploaded named `{file_name}` (also saved as `{csv_file_name}`):\n```csv\n{csv}```\n"
+                        string_file_content += f"Content from uploaded Excel file `{file_name}` (converted and saved as `{csv_file_name}` - use this CSV file for all Read File and pandas operations):\n```csv\n{csv}```\n"
                         return (
-                            f"Converted [{file_name}]({file_path}) and converted to CSV format at [{csv_file_name}]({csv_file_path}).",
+                            f"Converted [{file_name}]({file_path}) to CSV format at [{csv_file_name}]({csv_file_path}). Use `{csv_file_name}` for file operations.",
                             string_file_content,
                         )
                 except Exception as e:
diff --git a/agixt/extensions/essential_abilities.py b/agixt/extensions/essential_abilities.py
@@ -439,6 +439,7 @@ async def read_file(
         - The user can browse the agents workspace by clicking the folder icon in their chat input bar
         - For large files or data analysis, consider using Execute Python Code to extract specific information
         - For CSV/data files, use Execute Python Code with pandas to analyze data efficiently
+        - XLSX/XLS files are automatically converted to CSV format for reading
         """
         MAX_LINES = 100  # Maximum lines to return per read
         try:
@@ -452,6 +453,47 @@ async def read_file(
         try:
             filepath = self.safe_join(filename)
 
+            # Check if this is an Excel file - convert to CSV if needed
+            file_ext = os.path.splitext(filename)[1].lower()
+            csv_notice = ""
+            if file_ext in [".xlsx", ".xls"]:
+                import pandas as pd
+
+                # Check if CSV version already exists
+                base_name = os.path.splitext(filename)[0]
+                csv_filename = f"{base_name}.csv"
+                csv_filepath = self.safe_join(csv_filename)
+
+                if not os.path.exists(csv_filepath):
+                    # Convert Excel to CSV
+                    try:
+                        xl = pd.ExcelFile(filepath)
+                        if len(xl.sheet_names) > 1:
+                            # Multiple sheets - convert each to separate CSV
+                            csv_files = []
+                            for i, sheet_name in enumerate(xl.sheet_names, 1):
+                                df = xl.parse(sheet_name)
+                                sheet_csv_filename = f"{base_name}_{i}.csv"
+                                sheet_csv_filepath = self.safe_join(sheet_csv_filename)
+                                df.to_csv(sheet_csv_filepath, index=False)
+                                csv_files.append(sheet_csv_filename)
+                            csv_notice = f"**Note**: Excel file `{filename}` has {len(xl.sheet_names)} sheets. Converted to: {', '.join(csv_files)}. Reading first sheet (`{csv_files[0]}`).\n\n"
+                            csv_filepath = self.safe_join(csv_files[0])
+                            csv_filename = csv_files[0]
+                        else:
+                            # Single sheet
+                            df = pd.read_excel(filepath)
+                            df.to_csv(csv_filepath, index=False)
+                            csv_notice = f"**Note**: Excel file `{filename}` converted to `{csv_filename}` for reading.\n\n"
+                    except Exception as e:
+                        return f"Error: Failed to convert Excel file to CSV: {str(e)}"
+                else:
+                    csv_notice = f"**Note**: Reading CSV version `{csv_filename}` of Excel file `{filename}`.\n\n"
+
+                # Update filepath to read the CSV version
+                filepath = csv_filepath
+                filename = csv_filename
+
             # Read the file lines
             with open(filepath, "r", encoding="utf-8") as f:
                 lines = f.readlines()
@@ -479,7 +521,8 @@ async def read_file(
             lines_returned = len(selected_lines)
 
             # Build header with line information
-            header = (
+            header = csv_notice  # Include Excel->CSV conversion notice if applicable
+            header += (
                 f"Lines {actual_start}-{actual_end} of {total_lines} total lines:\n"
             )
             header += "=" * 40 + "\n"
diff --git a/agixt/version b/agixt/version
@@ -1 +1 @@
-v1.8.1
+v1.8.2