lanl
diff --git a/‎README.rst‎
Lines changed: 1 addition & 1 deletion b/‎README.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dsi/backends/duckdb.py‎
Lines changed: 14 additions & 8 deletions b/‎dsi/backends/duckdb.py‎
Lines changed: 14 additions & 8 deletions
diff --git a/‎dsi/backends/sqlite.py‎
Lines changed: 14 additions & 9 deletions b/‎dsi/backends/sqlite.py‎
Lines changed: 14 additions & 9 deletions
diff --git a/‎dsi/core.py‎
Lines changed: 1 addition & 23 deletions b/‎dsi/core.py‎
Lines changed: 1 addition & 23 deletions
diff --git a/‎dsi/dsi.py‎
Lines changed: 2 additions & 2 deletions b/‎dsi/dsi.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dsi/plugins/file_reader.py‎
Lines changed: 7 additions & 3 deletions b/‎dsi/plugins/file_reader.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎dsi/plugins/file_writer.py‎
Lines changed: 29 additions & 5 deletions b/‎dsi/plugins/file_writer.py‎
Lines changed: 29 additions & 5 deletions
@@ -59,7 +59,7 @@ Copyright and License
 
 This program is open source under the BSD-3 License.
 
-© 2025. Triad National Security, LLC. All rights reserved.
+© 2025. Triad National Security, LLC. All rights reserved. LA-UR-25-29245
 
 Redistribution and use in source and binary forms, with or without modification, are permitted
 provided that the following conditions are met:
 
@@ -8,7 +8,7 @@
 exec(open("../dsi/_version.py").read())
 
 project = 'DSI'
-copyright = '2025, Triad National Security, LLC. All rights reserved.'
+copyright = '2025, Triad National Security, LLC. All rights reserved. LA-UR-25-29248'
 author = 'The DSI Project team'
 release = __version__
 
 
@@ -68,13 +68,19 @@ def sql_type(self, input_list):
         `return`: str
             A string representing the inferred DuckDB data type for the input list.
         """
-        for item in input_list:
-            if isinstance(item, int):
-                return " INTEGER"
-            elif isinstance(item, float):
-                return " FLOAT"
-            elif isinstance(item, str):
-                return " VARCHAR"
+        DUCKDB_BIGINT_MIN = -9223372036854775808
+        DUCKDB_BIGINT_MAX =  9223372036854775807
+        DUCKDB_INT_MIN = -2147483648
+        DUCKDB_INT_MAX =  2147483647
+
+        if all(isinstance(x, int) for x in input_list if x is not None):
+            if any(x < DUCKDB_BIGINT_MIN or x > DUCKDB_BIGINT_MAX for x in input_list if x is not None):
+                return " DOUBLE"
+            elif any(x < DUCKDB_INT_MIN or x > DUCKDB_INT_MAX for x in input_list if x is not None):
+                return " BIGINT"
+            return " INTEGER"
+        elif all(isinstance(x, float) for x in input_list if x is not None):
+            return " DOUBLE"
         return " VARCHAR"
 
     def duckdb_compatible_name(self, name):
@@ -822,7 +828,7 @@ def summary_helper(self, table_name):
         """
         col_info = self.cur.execute(f"PRAGMA table_info({table_name})").fetchall()
 
-        numeric_types = {'INTEGER', 'REAL', 'FLOAT', 'NUMERIC', 'DECIMAL', 'DOUBLE'}
+        numeric_types = {'INTEGER', 'REAL', 'FLOAT', 'NUMERIC', 'DECIMAL', 'DOUBLE', 'BIGINT'}
         headers = ['column', 'type', 'min', 'max', 'avg', 'std_dev']
         rows = []
 
 
@@ -45,12 +45,15 @@ class Sqlite(Filesystem):
     """
     runTable = False
 
-    def __init__(self, filename):
+    def __init__(self, filename, **kwargs):
         """
         Initializes a SQLite backend with a user inputted filename, and creates other internal variables
         """
         self.filename = filename
-        self.con = sqlite3.connect(filename)
+        if 'kwargs' in kwargs:
+            self.con = sqlite3.connect(filename, **kwargs['kwargs'])
+        else:
+            self.con = sqlite3.connect(filename)
         self.cur = self.con.cursor()
         self.runTable = Sqlite.runTable
         self.sqlite_keywords = ["ABORT", "ACTION", "ADD", "AFTER", "ALL", "ALTER", "ALWAYS", "ANALYZE", "AND", "AS", "ASC", "ATTACH", 
@@ -80,14 +83,16 @@ def sql_type(self, input_list):
         `return`: str
             A string representing the inferred SQLite data type for the input list.
         """
-        for item in input_list:
-            if isinstance(item, int):
-                return " INTEGER"
-            elif isinstance(item, float):
+        SQLITE_INT_MIN = -9223372036854775808
+        SQLITE_INT_MAX =  9223372036854775807
+
+        if all(isinstance(x, int) for x in input_list if x is not None):
+            if any(x < SQLITE_INT_MIN or x > SQLITE_INT_MAX for x in input_list if x is not None):
                 return " FLOAT"
-            elif isinstance(item, str):
-                return " VARCHAR"
-        return ""
+            return " INTEGER"
+        elif all(isinstance(x, float) for x in input_list if x is not None):
+            return " FLOAT"
+        return " VARCHAR"
 
     def sqlite_compatible_name(self, name):
         if (name.startswith('"') and name.endswith('"')) or (name.upper() not in self.sqlite_keywords and name.isidentifier()):
 
@@ -872,7 +872,7 @@ def wrap_in_quotes(value):
                 self.logger.error(f"Error finding rows due to {return_object[1]}")
             raise return_object[0](return_object[1])
         elif isinstance(return_object, list) and isinstance(return_object[0], str):
-            err_msg = f"'{column_name}' appeared in more than one table. Can only do a conditional find if '{column_name}' is in one table"
+            err_msg = f"'{column_name}' appeared in more than one table. Can only find if '{column_name}' is in one table"
             if self.debug_level != 0:
                 self.logger.warning(err_msg)
             print(f"WARNING: {err_msg}")
@@ -1479,28 +1479,6 @@ def index(self, local_loc, remote_loc, isVerbose=False):
             with redirect_stdout(fnull):
                 t.load_module('plugin', "Dict", "reader", collection=st_dict, table_name="filesystem")
                 t.artifact_handler(interaction_type='ingest')
-
-            # # Create new filesystem collection with origin and remote locations
-            # # Stage data for ingest
-            # # Transpose the OrderedDict to a list of row dictionaries
-            # num_rows = len(next(iter(st_dict.values())))  # Assume all columns are of equal length
-            # rows = []
-
-            # for i in range(num_rows):
-            #     row = {col: values[i] for col, values in st_dict.items()}
-            #     rows.append(row)
-
-            # # Temporary csv to ingest
-            # output_file = '.fs.csv'
-            # with open(output_file, mode='w', newline='') as csvfile:
-            #     writer = csv.DictWriter(csvfile, fieldnames=st_dict.keys())
-            #     writer.writeheader()
-            #     writer.writerows(rows)
-            
-            # # Add filesystem table
-            # t.load_module('plugin', 'Csv', 'reader', filenames=".fs.csv", table_name="filesystem")
-            # #t.load_module('plugin', 'collection_reader', 'reader', st_dict )
-            # t.artifact_handler(interaction_type='ingest')
 
         t.close()
 
 
@@ -17,7 +17,7 @@ class DSI():
     The DSI Class abstracts Core.Terminal for managing metadata and Core.Sync for data management and movement.
     '''
 
-    def __init__(self, filename = ".temp.db", backend_name = "Sqlite"):
+    def __init__(self, filename = ".temp.db", backend_name = "Sqlite", **kwargs):
         """
         Initializes DSI by activating a backend for data operations; default is a Sqlite backend for temporary data analysis.
         If users specify `filename`, data is saved to a permanent backend file.
@@ -61,7 +61,7 @@ def __init__(self, filename = ".temp.db", backend_name = "Sqlite"):
         try:
             if backend_name.lower() == 'sqlite':
                 with redirect_stdout(fnull):
-                    self.t.load_module('backend','Sqlite','back-write', filename=filename)
+                    self.t.load_module('backend','Sqlite','back-write', filename=filename, kwargs = kwargs)
                     self.backend_name = "sqlite"
             elif backend_name.lower() == 'duckdb':
                 with redirect_stdout(fnull):
 
@@ -746,22 +746,26 @@ def add_rows(self) -> None:
             field_names = []
             for element, val in data.items():
                 if element not in ['authorship', 'data']:
+                    if isinstance(val, list):
+                        val = ",, ".join(val)
                     if element not in temp_data.keys():
                         temp_data[element] = [val]
                     else:
                         temp_data[element].append(val)
                     field_names.append(element)
                 else:
                     for field, val2 in val.items():
+                        if isinstance(val2, list):
+                            val2 = ",, ".join(val2)
                         if field not in temp_data.keys():
                             temp_data[field] = [val2]
                         else:
                             temp_data[field].append(val2)
                         field_names.append(field)
 
-            if sorted(field_names) != sorted(["name", "description", "data_uses", "creators", "creation_date", 
-                                              "la_ur", "owner", "funding", "publisher", "published_date", "origin_location", 
-                                              "num_simulations", "version", "license", "live_dataset"]):
+            if sorted(field_names) != sorted(["title", "description", "keywords", "instructions_of_use", "authors", 
+                                              "release_date", "la_ur", "funding", "rights", "file_types", "num_simulations", 
+                                              "file_size", "num_files", "dataset_size", "version", "doi"]):
                 return (ValueError, f"Error in reading {filename} data card. Please ensure all fields included match the template")
 
         self.datacard_data["oceans11_datacard"] = temp_data
 
@@ -24,7 +24,7 @@ class ER_Diagram(FileWriter):
     """
     DSI Writer that generates an ER Diagram from the current data in the DSI abstraction
     """
-    def __init__(self, filename, target_table_prefix = None, **kwargs):
+    def __init__(self, filename, target_table_prefix = None, max_cols = None, **kwargs):
         """
         Initializes the ER Diagram writer
 
@@ -35,10 +35,15 @@ def __init__(self, filename, target_table_prefix = None, **kwargs):
             If provided, filters the ER Diagram to only include tables whose names begin with this prefix.
 
             - Ex: If prefix = "student", only "student__address", "student__math", "student__physics" tables are included
+
+        `max_cols` : int, optional, default None
+            If provided, limits the number of columns displayed for each table in the ER Diagram.
+            If relational data is included, this must be >= number of primary and foreign keys for a table.
         """
         super().__init__(filename, **kwargs)
         self.output_filename = filename
         self.target_table_prefix = target_table_prefix
+        self.max_cols = max_cols
 
     def get_rows(self, collection) -> None:
         """
@@ -99,7 +104,23 @@ def get_rows(self, collection) -> None:
 
             col_list = tableData.keys()
             if tableName == "dsi_units":
-                col_list = ["table_name", "column_and_unit"]
+                col_list = ["table_name", "column_name", "unit"]
+            if self.max_cols is not None:
+                if "dsi_relations" in collection.keys():
+                    fk_cols = [t[1] for t in collection["dsi_relations"]["foreign_key"] if t[0] == tableName]
+                    pk_cols = [t[1] for t in collection["dsi_relations"]["primary_key"] if t[0] == tableName]
+                    rel_cols = set(pk_cols + fk_cols)
+
+                    if rel_cols:
+                        if len(rel_cols) > self.max_cols:
+                            return (ValueError, "'max_cols' must be >= to the number of primary/foreign key columns.")
+                        other_cols = [col for col in col_list if col not in rel_cols]
+                        combined = list(rel_cols) + other_cols[:self.max_cols - len(rel_cols)]
+                        col_list = [k for k in col_list if k in combined]
+                col_list = col_list[:self.max_cols]
+                if len(tableData.keys()) > self.max_cols:
+                    col_list.append("...")
+
             curr_row = 0
             inner_brace = 0
             for col_name in col_list:
@@ -121,9 +142,9 @@ def get_rows(self, collection) -> None:
 
         if "dsi_relations" in collection.keys():
             for f_table, f_col in collection["dsi_relations"]["foreign_key"]:
-                if self.target_table_prefix is not None and self.target_table_prefix not in f_table:
+                if self.target_table_prefix is not None and f_table is not None and self.target_table_prefix not in f_table:
                     continue
-                if f_table != None:
+                if f_table is not None:
                     foreignIndex = collection["dsi_relations"]["foreign_key"].index((f_table, f_col))
                     fk_string = f"{f_table}:{f_col}"
                     pk_string = f"{collection['dsi_relations']['primary_key'][foreignIndex][0]}:{collection['dsi_relations']['primary_key'][foreignIndex][1]}"
@@ -137,7 +158,10 @@ def get_rows(self, collection) -> None:
             subprocess.run(["dot", "-T", file_type[1:], "-o", self.output_filename + file_type, self.output_filename + ".dot"])
             os.remove(self.output_filename + ".dot")
         else:
-            dot.render(self.output_filename, cleanup=True)
+            try:
+                dot.render(self.output_filename, cleanup=True)
+            except:
+                return (EnvironmentError, "Graphviz executable must be downloaded to global environment using sudo or homebrew.")
 
 class Csv_Writer(FileWriter):
     """