This repository was archived by the owner on Mar 28, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGLIMS.py
More file actions
executable file
·219 lines (195 loc) · 8.81 KB
/
GLIMS.py
File metadata and controls
executable file
·219 lines (195 loc) · 8.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#!/usr/bin/env python
from google.appengine.api import users
#from google.appengine.ext import webapp
import gdata.docs.data
import gdata.docs.client
import gdata.spreadsheet.service
#import logging
import re, os, tempfile, bisect
from collection import *
class Helper:
#def __init__(self, email, password):
def __init__(self):
self.user = users.get_current_user()
#logging.info('Logged in: '+user.email())
self.client = gdata.docs.client.DocsClient(source='helper-0r1')
self.client.ssl = True # Force all API requests through HTTPS
# Set to True for debugging HTTP requests
self.client.http_client.debug = False
#self.client.ClientLogin(email, password, self.client.source)
#self.client.GetAccessToken()
self.spreadsheets_client = gdata.spreadsheet.service.SpreadsheetsService(source=self.client.source)
#self.spreadsheets_client.ClientLogin(email, password, self.client.source)
def get_studies(self):
res = []
feed = self.client.GetDocList(uri='/feeds/default/private/full/-/folder?title=BiRG%20Studies%20Data%20-%20DO%20NOT%20RENAME&title-exact=true')
for e in feed.entry:
res.append({'entry': e, 'resource_id': e.resource_id.text, 'name': e.title.text})
return res
def get_collections(self, root_collection_name):
res = []
feed = self.client.GetDocList(uri='/feeds/default/private/full/-/folder?title='+root_collection_name+'&title-exact=true')
for e in feed.entry:
res.append({'entry': e, 'resource_id': e.resource_id.text, 'name': e.title.text})
return res
def get_sub_collections(self,e):
res = []
src = e.content.src
if re.search('folder',src):
children_feed = self.client.GetDocList(uri=src+'/-/folder?showfolders=true')
for ce in children_feed.entry:
if ce.GetDocumentType() == 'folder': #and ce.resource_id.text != e.resource_id.text:
#print 'Sub',ce.title.text
res.append({'entry': ce,'resource_id': ce.resource_id.text,'name': ce.title.text, 'sub': self.get_sub_collections(ce)})
return res
class Study:
def __init__(self,helper,root_collection):
self.helper = helper
self.metadata = {}
self.entries = {}
if isinstance(root_collection,str):
self.root_entry = self.helper.client.Create(gdata.docs.data.FOLDER_LABEL, root_collection)
else:
self.root_entry = root_collection
res = helper.get_sub_collections(root_collection)
for c in res:
self.metadata[c['name']] = {}
self.entries[c['name']] = c['entry']
for sc in c['sub']:
self.metadata[c['name']][sc['name']] = sc['entry']
# Creates a new metadata field called name, unless one already exists.
def add_metadata_field(self,name):
if not self.metadata.has_key(name):
try:
while True:
self.entries[name] = self.helper.client.Create(gdata.docs.data.FOLDER_LABEL, str(name), folder_or_id=self.root_entry)
self.metadata[name] = {}
break
except:
print "Create folder error. Retrying..."
def add_metadata_value(self,name,value):
if self.metadata[name].has_key(value) == False:
while True:
try:
self.metadata[name][value] = self.helper.client.Create(gdata.docs.data.FOLDER_LABEL, str(value), folder_or_id=self.entries[name])
break
except:
print "Create folder error. Retrying..."
def move_into_folder(self,entries,folder):
if not isinstance(entries,list):
entries = [entries]
# Move the spreadsheets into the root folder
for doc in entries:
while True:
try:
self.helper.client.Move(doc,folder)
break
except:
print "Move error. Retrying..."
def get_files_by_value(self,name,value):
if self.metadata.has_key(name) == False:
return None
elif self.metadata[name].has_key(value) == False:
return None
else:
src = self.metadata[name][value].content.src
children_feed = self.helper.client.GetDocList(uri=src+'/-/contents')
files = []
for ce in children_feed.entry:
if ce.GetDocumentType() == 'spreadsheet':
files.append(ce)
return files
def merge_xy_files(self,files):
docs_token = self.helper.client.auth_token
self.helper.client.auth_token = gdata.gauth.ClientLoginToken(self.helper.spreadsheets_client.GetClientLoginToken())
xY = {}
sorted_keys = []
i = 0
for file in files:
fd, path = tempfile.mkstemp()
self.helper.client.Export(file,path+'.csv')
# Read and store for later
xys = []
f = open(path+'.csv','r')
for line in f:
xys.append(line.split(","))
f.close()
# Construct the x list from the first file
if i == 0:
for xy in xys:
x = float(xy[0])
xY[x] = []
sorted_keys.append(x)
sorted_keys.reverse()
# Initialize for this file
for x in sorted_keys:
xY[x].append(None)
for xy in xys:
x = float(xy[0])
y = float(xy[1])
x_key = x
if xY.has_key(x) == False: # Find the closest key
position = bisect.bisect(sorted_keys,x)
prev_x = None
x_key = None
if position > 0:
prev_x = sorted_keys[position-1]
next_x = None
if position < len(sorted_keys)-1:
next_x = sorted_keys[position + 1]
if prev_x == None and next_x == None:
raise("Something is incorrect")
elif prev_x == None:
x_key = next_x
elif next_x == None:
x_key = prev_x
else:
if abs(next_x - x) < abs(prev_x - x):
x_key = next_x
else:
x_key = prev_x
if xY[x_key][i] == None: # Not yet added
xY[x_key][i] = y
else: # Already added, so average
xY[x_key][i] = (xY[x_key][i] + y)/2
i = i + 1
self.helper.client.auth_token = docs_token
sorted_keys.reverse()
return xY,sorted_keys
def upload_files(self,c):
# Upload the actual data files
new_spreadsheets = []
for i in range(0,len(c.Y)):
fd, path = tempfile.mkstemp()
s = []
for j in range(0,len(c.x)):
s.append(str(c.x[j])+","+str(c.Y[i][j])+"\n")
os.write(fd,''.join(s))
os.close(fd)
while True:
try:
ms = gdata.data.MediaSource(file_path=path, content_type=gdata.docs.data.MIMETYPES['CSV'])
new_spreadsheets.append(self.helper.client.Upload(ms, 'XY'))
break
except:
print "Upload error. Retrying..."
print "Finished",i+1,"out of",len(c.Y)
self.move_into_folder(new_spreadsheets,self.root_entry)
# Now move the metadata
for name, values in c.metadata.iteritems():
self.add_metadata_field(name)
#self.move_into_folder(new_spreadsheets,self.entries[name])
if isinstance(values,list):
for i in range(0,len(values)):
self.add_metadata_value(name,str(values[i]))
self.move_into_folder(new_spreadsheets[i],self.metadata[name][str(values[i])])
else:
self.add_metadata_value(name,str(values))
self.move_into_folder(new_spreadsheets,self.metadata[name][str(values)])
##
##potential_studies = helper.get_collections('Sample Study')
##for study in potential_studies:
## print study['name']
##
##study = Study(helper,potential_studies[0]['entry'])
##print study.metadata