signalingsystemslab · SarahPythonista · Aug 26, 2025
diff --git a/Run/readScopeRuns.m b/Run/readScopeRuns.m
@@ -2,8 +2,8 @@
 %- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 % [data] = readScopeRuns(url, varargin)
 %
-% READSCOPERUNS looks up a published Google Sheet (by URL/ID) and converts all info in 
-% the main table into a cell array, then pulls out data corresponding to specified ID(s)
+% READSCOPERUNSNEW looks up a published Google Sheet (by URL/ID) and requests 
+% it as a CSV. It then pulls out data corresponding to specified ID(s)
 %
 % url         input Google Sheet URL
 % run_rows    IDs corresponding to rows on Google Sheet (1 row per experimental condition)
@@ -14,86 +14,84 @@
     error('ERROR: specify at least 1 ID number from ''Scope Runs'' spreadsheet')
 end
 
+% Find the sheet ID based on the URL. We can then do our query
+% Below line contains regex "pattern" with names groups for id and gid
 
-% Read url's contents into string
-pageString = urlread(url);
+reMatch = regexp(url, '/docs.google.com\/spreadsheets\/d\/(?<id>[^\/]+)(?<remainder>[^#]*)', 'names');
 
-% Find all tables on page.
-[tables] = regexp(pageString, '(<table[^>]*>(?:(?>[^<]+)|<(?!table[^>]*>))*?</table>)','tokens');
+% Check if we found a match for ID
+if isempty(reMatch) || ~isfield(reMatch, 'id') || isempty(reMatch.id)
+    error("ERROR: Couldn''t find the spreadsheet ID in the sheet!")
+end
 
-% Build cell aray of table data
+full_url = "https://docs.google.com/spreadsheets/d/" + reMatch.id + "/export?format=csv";
 
-for i = 1:length(tables)
-    table = tables{i};
-    rows = regexpi(table, '<tr.*?>(.*?)</tr>','tokens');
-    table_data = cell(0);
-    % Pull off headers (if present)
-    headers = regexpi(rows{1}{1}, '<th.*?>(.*?)</th>','tokens');
-    if isempty(headers{1})
-        start_mod = 0;
-    else
-        start_mod = 1;
-    end
-
-    % Cycle rows, then columns - pull information into a cell array
-    for j = 1:(numel(rows{1})-start_mod)
-        cols = regexpi(rows{1}{j+start_mod}, '<td.*?>(.*?)</td>','tokens');
-        for k = 1:numel(cols{1})
-            tmp = regexprep(cols{1}{k},'<.*?>', '');
-            table_data{j,k} = tmp{1};
-        end
-    end
-
-    % Stop scanning tables once we get to a valid one
-    if strcmp(table_data{1,1},'#')
-        break
+% check if there was a gid (a tab specified)
+if isfield(reMatch, 'remainder') && ~isempty(reMatch.remainder)
+    gidPattern = '\/[^?]+[?].*(gid=)(?<gid>.*)';
+    gidMatch = regexp(reMatch.remainder, gidPattern, 'names');
+    if ~isempty(gidMatch) && isfield(gidMatch, 'gid') && ~isempty(gidMatch.gid)
+        full_url = full_url + "&gid=" + gidMatch.gid;        
     end
-
 end
-
 
-% Drop any empty rows/columns
-table_data(:,sum(cellfun(@isempty,table_data),1)==size(table_data,1)) = [];
-table_data(sum(cellfun(@isempty,table_data),2)==size(table_data,2),:) = [];
+% Pull it in, preserving the original variable names and keeping datetimes
+% as text to avoid incorrect parsing/getting a bunch of annoying error messages
+opts = detectImportOptions(full_url,"VariableNamingRule", "preserve", "DatetimeType","text");
+r = readtable(full_url, opts);
+
+% REMOVE ALL SPACES AND LOWERCASE ALL LETTERS FROM VARIABLE NAMES
+r.Properties.VariableNames = cellfun(@(data) lower(data(~isspace(data))), r.Properties.VariableNames, 'UniformOutput', false);
+
+% CHECK IF THE COLUMN # EXISTS
+if ~ismember('#', r.Properties.VariableNames)
+    error('ERROR: Could not find a column named "#" in spreadsheet.');
+end
 
+% Find the rows we want (the ones mentioned in run_rows)
+relevant_rows = r(ismember(r.("#"),run_rows),:);
 
-% Get IDs and pull out corresponding rows
-%ids = cellfun(@str2num,table_data(2:end,strcmpi(table_data(1,:),'#')), 'UniformOutput', false); 
-ids = cellfun(@str2num,table_data(2:end,strcmpi(table_data(1,:),'#'))); 
+missing_requested_rows = setdiff(run_rows, relevant_rows.('#'));
 
-% cellfun applies function to each cells in cell array, converts string to
-% number of the google sheet (all rows except 1st, in the column called #
-% (search all columns in row 1 for #)
-if isempty(ids)
-    error('ERROR: Couldn''t find a column named "#" in spreadsheet.')
+% If we didn't find anything we asked for
+if height(relevant_rows) == 0
+    error("ERROR: Could not find any rows in the sheet also listed in the target rows");
 end
 
-[~,locs] = ismember(ids, run_rows);
-locs = find(locs);
-
-data.save_folder = table_data(2:end,strcmpi(table_data(1,:),'folder name'));
-data.image_paths = table_data(2:end,strcmpi(table_data(1,:),'image path'));
-data.xy_ranges = table_data(2:end,strcmpi(table_data(1,:),'xy'));
-data.time_ranges = table_data(2:end,strcmpi(table_data(1,:),'t'));
-data.parameter_files = table_data(2:end,strcmpi(table_data(1,:),'params file'));
-data.save_dir = table_data(2:end,strcmpi(table_data(1,:),'save path'));
-data.modify = table_data(2:end,strcmpi(table_data(1,:),'other params'));
-%20201208 addiiton SL dose info from table
-data.dose = table_data(2:end,strcmpi(table_data(1,:),'dose'));
-% For parameter modifier column: sub in single quotes that are misformatted after google sheet read
-for idx = 1:length(data.modify)
-        tmp_str = data.modify{idx};
-        subs = strfind(tmp_str,'&#39');
-        for i = 1:length(subs); tmp_str(subs(i):subs(i)+4) = '~~''~~'; end
-        tmp_str(strfind(tmp_str,'~')) = [];
-        data.modify{idx} = tmp_str;
+% If we asked for at least one thing that wasn't in the spreadsheet
+if not(isempty(missing_requested_rows))
+    error(strcat("ERROR: Could not find row(s) " + strjoin(string(missing_requested_rows), ", ") + " in sheet!"));
 end
 
-% Double check that all necessary data was found and subset data
+% Expected columns
+expectedFields = {'#', 'foldername', 'imagepath', 'xy', 't', 'paramsfile', 'savepath', 'otherparams', 'dose'};
+
+% Check for missing columns
+missingFields = setdiff(expectedFields, relevant_rows.Properties.VariableNames);
+if ~isempty(missingFields)
+    error("ERROR: Missing expected columns: %data", strjoin(missingFields, ", "));
+end
+
+data = struct();
+data.save_folder = relevant_rows.foldername;
+data.image_paths = relevant_rows.imagepath;
+data.xy_ranges = relevant_rows.xy;
+data.time_ranges = relevant_rows.t;
+data.parameter_files = relevant_rows.paramsfile;
+data.save_dir = relevant_rows.savepath;
+data.modify = relevant_rows.otherparams;
+
+data.dose = relevant_rows.dose;
+
+% Double check that all necessary data was found and take only selected
+% rows
+% NOTE: THE FOLLOWING IS REMAINING FROM A PRIOR VERSION
+% TODO: Check if it should be removed or revised.
 types = fieldnames(data);
 for i = 1:length(types)
     if isempty(data.(types{i}))
         error(['ERROR: Couldn''t find column "',types{i},'" in spreadsheet.'])
     end
-    data.(types{i}) = data.(types{i})(locs);
+    % Already did the following so I'm commenting it out
+    %data.(types{i}) = data.(types{i})(locs);
 end