address csv file data processing

“Aidan · “Aidan · commit 43230f445a2d · 2025-05-13T12:34:03.000+01:00
diff --git a/src/actinet/actinet.py b/src/actinet/actinet.py
@@ -78,6 +78,18 @@ def main():
         help="Exclude days with wear time below threshold. Pass values as strings, e.g.: '12H', '30min'. Default: None (no exclusion)",
         type=str, 
         default=None)
+    parser.add_argument(
+        "--csvStartRow",
+        help="Row number to start reading a CSV file. Default: 0",
+        type=int,
+        default=0,
+    )
+    parser.add_argument("--txyz",
+                        help=("Use this option to specify the column names for time, x, y, z "
+                              "in the input file, in that order. Use a comma-separated string. "
+                              "Only needed for CSV files, can be ignored for other file types. "
+                              "Default: 'time,x,y,z'"),
+                        type=str, default="time,x,y,z")
     parser.add_argument(
         "--plot-activity",
         "-p",
@@ -129,6 +141,8 @@ def main():
     # Load file
     data, info_read = read(
         args.filepath,
+        args.txyz,
+        args.csvStartRow -1,  # -1 to convert to zero-based index
         resample_hz=None,
         sample_rate=args.sample_rate,
         verbose=verbose,
@@ -258,27 +272,33 @@ def main():
 
 
 def read(
-    filepath, resample_hz="uniform", sample_rate=None, lowpass_hz=None, verbose=True
+    filepath, usecols, skipRows=0, resample_hz="uniform", 
+    sample_rate=None, lowpass_hz=None, verbose=True
 ):
 
     p = pathlib.Path(filepath)
     ftype = p.suffixes[0].lower()
     fsize = round(p.stat().st_size / (1024 * 1024), 1)
 
     if ftype in (".csv", ".pkl"):
-
         if ftype == ".csv":
+            tcol, xcol, ycol, zcol = usecols.split(',')
+            
             data = pd.read_csv(
                 filepath,
-                usecols=["time", "x", "y", "z"],
-                parse_dates=["time"],
-                index_col="time",
-                dtype={"x": "f4", "y": "f4", "z": "f4"},
+                usecols=[tcol, xcol, ycol, zcol],
+                parse_dates=[tcol],
+                index_col=tcol,
+                dtype={xcol: "f4", ycol: "f4", zcol: "f4"},
+                skiprows=skipRows,
             )
+
+            # rename to standard names
+            data = data.rename(columns={xcol: 'x', ycol: 'y', zcol: 'z'})
+            data.index.name = 'time'
+
         elif ftype == ".pkl":
             data = pd.read_pickle(filepath)
-        else:
-            raise ValueError(f"Unknown file format: {ftype}")
 
         if sample_rate in (None, False):
             freq = infer_freq(data.index)
@@ -319,6 +339,9 @@ def read(
             verbose=verbose,
         )
 
+    else:
+        raise ValueError(f"Unknown file format: {ftype}")
+
     if "ResampleRate" not in info:
         info["ResampleRate"] = info["SampleRate"]