Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 63 additions & 65 deletions Run/readScopeRuns.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
%- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
% [data] = readScopeRuns(url, varargin)
%
% READSCOPERUNS looks up a published Google Sheet (by URL/ID) and converts all info in
% the main table into a cell array, then pulls out data corresponding to specified ID(s)
% READSCOPERUNSNEW looks up a published Google Sheet (by URL/ID) and requests
% it as a CSV. It then pulls out data corresponding to specified ID(s)
%
% url input Google Sheet URL
% run_rows IDs corresponding to rows on Google Sheet (1 row per experimental condition)
Expand All @@ -14,86 +14,84 @@
error('ERROR: specify at least 1 ID number from ''Scope Runs'' spreadsheet')
end

% Find the sheet ID based on the URL. We can then do our query
% Below line contains regex "pattern" with names groups for id and gid

% Read url's contents into string
pageString = urlread(url);
reMatch = regexp(url, '/docs.google.com\/spreadsheets\/d\/(?<id>[^\/]+)(?<remainder>[^#]*)', 'names');

% Find all tables on page.
[tables] = regexp(pageString, '(<table[^>]*>(?:(?>[^<]+)|<(?!table[^>]*>))*?</table>)','tokens');
% Check if we found a match for ID
if isempty(reMatch) || ~isfield(reMatch, 'id') || isempty(reMatch.id)
error("ERROR: Couldn''t find the spreadsheet ID in the sheet!")
end

% Build cell aray of table data
full_url = "https://docs.google.com/spreadsheets/d/" + reMatch.id + "/export?format=csv";

for i = 1:length(tables)
table = tables{i};
rows = regexpi(table, '<tr.*?>(.*?)</tr>','tokens');
table_data = cell(0);
% Pull off headers (if present)
headers = regexpi(rows{1}{1}, '<th.*?>(.*?)</th>','tokens');
if isempty(headers{1})
start_mod = 0;
else
start_mod = 1;
end

% Cycle rows, then columns - pull information into a cell array
for j = 1:(numel(rows{1})-start_mod)
cols = regexpi(rows{1}{j+start_mod}, '<td.*?>(.*?)</td>','tokens');
for k = 1:numel(cols{1})
tmp = regexprep(cols{1}{k},'<.*?>', '');
table_data{j,k} = tmp{1};
end
end

% Stop scanning tables once we get to a valid one
if strcmp(table_data{1,1},'#')
break
% check if there was a gid (a tab specified)
if isfield(reMatch, 'remainder') && ~isempty(reMatch.remainder)
gidPattern = '\/[^?]+[?].*(gid=)(?<gid>.*)';
gidMatch = regexp(reMatch.remainder, gidPattern, 'names');
if ~isempty(gidMatch) && isfield(gidMatch, 'gid') && ~isempty(gidMatch.gid)
full_url = full_url + "&gid=" + gidMatch.gid;
end

end


% Drop any empty rows/columns
table_data(:,sum(cellfun(@isempty,table_data),1)==size(table_data,1)) = [];
table_data(sum(cellfun(@isempty,table_data),2)==size(table_data,2),:) = [];
% Pull it in, preserving the original variable names and keeping datetimes
% as text to avoid incorrect parsing/getting a bunch of annoying error messages
opts = detectImportOptions(full_url,"VariableNamingRule", "preserve", "DatetimeType","text");
r = readtable(full_url, opts);

% REMOVE ALL SPACES AND LOWERCASE ALL LETTERS FROM VARIABLE NAMES
r.Properties.VariableNames = cellfun(@(data) lower(data(~isspace(data))), r.Properties.VariableNames, 'UniformOutput', false);

% CHECK IF THE COLUMN # EXISTS
if ~ismember('#', r.Properties.VariableNames)
error('ERROR: Could not find a column named "#" in spreadsheet.');
end

% Find the rows we want (the ones mentioned in run_rows)
relevant_rows = r(ismember(r.("#"),run_rows),:);

% Get IDs and pull out corresponding rows
%ids = cellfun(@str2num,table_data(2:end,strcmpi(table_data(1,:),'#')), 'UniformOutput', false);
ids = cellfun(@str2num,table_data(2:end,strcmpi(table_data(1,:),'#')));
missing_requested_rows = setdiff(run_rows, relevant_rows.('#'));

% cellfun applies function to each cells in cell array, converts string to
% number of the google sheet (all rows except 1st, in the column called #
% (search all columns in row 1 for #)
if isempty(ids)
error('ERROR: Couldn''t find a column named "#" in spreadsheet.')
% If we didn't find anything we asked for
if height(relevant_rows) == 0
error("ERROR: Could not find any rows in the sheet also listed in the target rows");
end

[~,locs] = ismember(ids, run_rows);
locs = find(locs);

data.save_folder = table_data(2:end,strcmpi(table_data(1,:),'folder name'));
data.image_paths = table_data(2:end,strcmpi(table_data(1,:),'image path'));
data.xy_ranges = table_data(2:end,strcmpi(table_data(1,:),'xy'));
data.time_ranges = table_data(2:end,strcmpi(table_data(1,:),'t'));
data.parameter_files = table_data(2:end,strcmpi(table_data(1,:),'params file'));
data.save_dir = table_data(2:end,strcmpi(table_data(1,:),'save path'));
data.modify = table_data(2:end,strcmpi(table_data(1,:),'other params'));
%20201208 addiiton SL dose info from table
data.dose = table_data(2:end,strcmpi(table_data(1,:),'dose'));
% For parameter modifier column: sub in single quotes that are misformatted after google sheet read
for idx = 1:length(data.modify)
tmp_str = data.modify{idx};
subs = strfind(tmp_str,'&#39');
for i = 1:length(subs); tmp_str(subs(i):subs(i)+4) = '~~''~~'; end
tmp_str(strfind(tmp_str,'~')) = [];
data.modify{idx} = tmp_str;
% If we asked for at least one thing that wasn't in the spreadsheet
if not(isempty(missing_requested_rows))
error(strcat("ERROR: Could not find row(s) " + strjoin(string(missing_requested_rows), ", ") + " in sheet!"));
end

% Double check that all necessary data was found and subset data
% Expected columns
expectedFields = {'#', 'foldername', 'imagepath', 'xy', 't', 'paramsfile', 'savepath', 'otherparams', 'dose'};

% Check for missing columns
missingFields = setdiff(expectedFields, relevant_rows.Properties.VariableNames);
if ~isempty(missingFields)
error("ERROR: Missing expected columns: %data", strjoin(missingFields, ", "));
end

data = struct();
data.save_folder = relevant_rows.foldername;
data.image_paths = relevant_rows.imagepath;
data.xy_ranges = relevant_rows.xy;
data.time_ranges = relevant_rows.t;
data.parameter_files = relevant_rows.paramsfile;
data.save_dir = relevant_rows.savepath;
data.modify = relevant_rows.otherparams;

data.dose = relevant_rows.dose;

% Double check that all necessary data was found and take only selected
% rows
% NOTE: THE FOLLOWING IS REMAINING FROM A PRIOR VERSION
% TODO: Check if it should be removed or revised.
types = fieldnames(data);
for i = 1:length(types)
if isempty(data.(types{i}))
error(['ERROR: Couldn''t find column "',types{i},'" in spreadsheet.'])
end
data.(types{i}) = data.(types{i})(locs);
% Already did the following so I'm commenting it out
%data.(types{i}) = data.(types{i})(locs);
end