diff --git a/Run/readScopeRuns.m b/Run/readScopeRuns.m index b14cef6..7af9887 100644 --- a/Run/readScopeRuns.m +++ b/Run/readScopeRuns.m @@ -2,8 +2,8 @@ %- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - % [data] = readScopeRuns(url, varargin) % -% READSCOPERUNS looks up a published Google Sheet (by URL/ID) and converts all info in -% the main table into a cell array, then pulls out data corresponding to specified ID(s) +% READSCOPERUNSNEW looks up a published Google Sheet (by URL/ID) and requests +% it as a CSV. It then pulls out data corresponding to specified ID(s) % % url input Google Sheet URL % run_rows IDs corresponding to rows on Google Sheet (1 row per experimental condition) @@ -14,86 +14,84 @@ error('ERROR: specify at least 1 ID number from ''Scope Runs'' spreadsheet') end +% Find the sheet ID based on the URL. We can then do our query +% Below line contains regex "pattern" with names groups for id and gid -% Read url's contents into string -pageString = urlread(url); +reMatch = regexp(url, '/docs.google.com\/spreadsheets\/d\/(?[^\/]+)(?[^#]*)', 'names'); -% Find all tables on page. -[tables] = regexp(pageString, '(]*>(?:(?>[^<]+)|<(?!table[^>]*>))*?)','tokens'); +% Check if we found a match for ID +if isempty(reMatch) || ~isfield(reMatch, 'id') || isempty(reMatch.id) + error("ERROR: Couldn''t find the spreadsheet ID in the sheet!") +end -% Build cell aray of table data +full_url = "https://docs.google.com/spreadsheets/d/" + reMatch.id + "/export?format=csv"; -for i = 1:length(tables) - table = tables{i}; - rows = regexpi(table, '(.*?)','tokens'); - table_data = cell(0); - % Pull off headers (if present) - headers = regexpi(rows{1}{1}, '(.*?)','tokens'); - if isempty(headers{1}) - start_mod = 0; - else - start_mod = 1; - end - - % Cycle rows, then columns - pull information into a cell array - for j = 1:(numel(rows{1})-start_mod) - cols = regexpi(rows{1}{j+start_mod}, '(.*?)','tokens'); - for k = 1:numel(cols{1}) - tmp = regexprep(cols{1}{k},'<.*?>', ''); - table_data{j,k} = tmp{1}; - end - end - - % Stop scanning tables once we get to a valid one - if strcmp(table_data{1,1},'#') - break +% check if there was a gid (a tab specified) +if isfield(reMatch, 'remainder') && ~isempty(reMatch.remainder) + gidPattern = '\/[^?]+[?].*(gid=)(?.*)'; + gidMatch = regexp(reMatch.remainder, gidPattern, 'names'); + if ~isempty(gidMatch) && isfield(gidMatch, 'gid') && ~isempty(gidMatch.gid) + full_url = full_url + "&gid=" + gidMatch.gid; end - end - -% Drop any empty rows/columns -table_data(:,sum(cellfun(@isempty,table_data),1)==size(table_data,1)) = []; -table_data(sum(cellfun(@isempty,table_data),2)==size(table_data,2),:) = []; +% Pull it in, preserving the original variable names and keeping datetimes +% as text to avoid incorrect parsing/getting a bunch of annoying error messages +opts = detectImportOptions(full_url,"VariableNamingRule", "preserve", "DatetimeType","text"); +r = readtable(full_url, opts); + +% REMOVE ALL SPACES AND LOWERCASE ALL LETTERS FROM VARIABLE NAMES +r.Properties.VariableNames = cellfun(@(data) lower(data(~isspace(data))), r.Properties.VariableNames, 'UniformOutput', false); + +% CHECK IF THE COLUMN # EXISTS +if ~ismember('#', r.Properties.VariableNames) + error('ERROR: Could not find a column named "#" in spreadsheet.'); +end +% Find the rows we want (the ones mentioned in run_rows) +relevant_rows = r(ismember(r.("#"),run_rows),:); -% Get IDs and pull out corresponding rows -%ids = cellfun(@str2num,table_data(2:end,strcmpi(table_data(1,:),'#')), 'UniformOutput', false); -ids = cellfun(@str2num,table_data(2:end,strcmpi(table_data(1,:),'#'))); +missing_requested_rows = setdiff(run_rows, relevant_rows.('#')); -% cellfun applies function to each cells in cell array, converts string to -% number of the google sheet (all rows except 1st, in the column called # -% (search all columns in row 1 for #) -if isempty(ids) - error('ERROR: Couldn''t find a column named "#" in spreadsheet.') +% If we didn't find anything we asked for +if height(relevant_rows) == 0 + error("ERROR: Could not find any rows in the sheet also listed in the target rows"); end -[~,locs] = ismember(ids, run_rows); -locs = find(locs); - -data.save_folder = table_data(2:end,strcmpi(table_data(1,:),'folder name')); -data.image_paths = table_data(2:end,strcmpi(table_data(1,:),'image path')); -data.xy_ranges = table_data(2:end,strcmpi(table_data(1,:),'xy')); -data.time_ranges = table_data(2:end,strcmpi(table_data(1,:),'t')); -data.parameter_files = table_data(2:end,strcmpi(table_data(1,:),'params file')); -data.save_dir = table_data(2:end,strcmpi(table_data(1,:),'save path')); -data.modify = table_data(2:end,strcmpi(table_data(1,:),'other params')); -%20201208 addiiton SL dose info from table -data.dose = table_data(2:end,strcmpi(table_data(1,:),'dose')); -% For parameter modifier column: sub in single quotes that are misformatted after google sheet read -for idx = 1:length(data.modify) - tmp_str = data.modify{idx}; - subs = strfind(tmp_str,'''); - for i = 1:length(subs); tmp_str(subs(i):subs(i)+4) = '~~''~~'; end - tmp_str(strfind(tmp_str,'~')) = []; - data.modify{idx} = tmp_str; +% If we asked for at least one thing that wasn't in the spreadsheet +if not(isempty(missing_requested_rows)) + error(strcat("ERROR: Could not find row(s) " + strjoin(string(missing_requested_rows), ", ") + " in sheet!")); end -% Double check that all necessary data was found and subset data +% Expected columns +expectedFields = {'#', 'foldername', 'imagepath', 'xy', 't', 'paramsfile', 'savepath', 'otherparams', 'dose'}; + +% Check for missing columns +missingFields = setdiff(expectedFields, relevant_rows.Properties.VariableNames); +if ~isempty(missingFields) + error("ERROR: Missing expected columns: %data", strjoin(missingFields, ", ")); +end + +data = struct(); +data.save_folder = relevant_rows.foldername; +data.image_paths = relevant_rows.imagepath; +data.xy_ranges = relevant_rows.xy; +data.time_ranges = relevant_rows.t; +data.parameter_files = relevant_rows.paramsfile; +data.save_dir = relevant_rows.savepath; +data.modify = relevant_rows.otherparams; + +data.dose = relevant_rows.dose; + +% Double check that all necessary data was found and take only selected +% rows +% NOTE: THE FOLLOWING IS REMAINING FROM A PRIOR VERSION +% TODO: Check if it should be removed or revised. types = fieldnames(data); for i = 1:length(types) if isempty(data.(types{i})) error(['ERROR: Couldn''t find column "',types{i},'" in spreadsheet.']) end - data.(types{i}) = data.(types{i})(locs); + % Already did the following so I'm commenting it out + %data.(types{i}) = data.(types{i})(locs); end