forked from mikemccabe/cul
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcul.py
More file actions
108 lines (87 loc) · 2.81 KB
/
cul.py
File metadata and controls
108 lines (87 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env python
import sys
import requests
import redis
import argparse
colredis = redis.StrictRedis(host='redis-current.us.archive.org', port=6377)
def get_md(id):
r = requests.get('http://archive.org/metadata/%s' % (id))
# print r
# print r.status_code
# print r.headers
# print r.encoding
return r.json()
def taglike_collections():
return [ 'stream_only', 'printdisabled' ]
def reexpand_ancestry(id,dict,flattened=None):
if flattened is None:
flattened = []
if id in dict.keys():
parents = dict[id]
if (id not in flattened): # and (len(parents) > 0):
flattened.append(id)
for parent in parents:
reexpand_ancestry(parent,dict,flattened)
else:
print '\tDISCONNECTED VALUE? ',id
return flattened
def find_ancestry(id,dict=None,tags=None):
root_item = False
if dict is None:
dict = {}
root_item = True
if tags is None:
tags = taglike_collections()
# print id
md = get_md(id)
if md and 'metadata' in md:
if 'collection' in md['metadata']:
c = md['metadata']['collection']
if isinstance(c, basestring) or isinstance(c, str):
c = [c]
else:
c = []
if root_item:
print 'On disk:\n\t', c
# parents = [a for a in c if (a in tags) or (c.index(a) == 0)] coding explicitly to collect cruft with index > 0
parents = []
crufty_ancestors = []
for cand in c:
if (cand in tags) or (c.index(cand) == 0):
parents.append(cand)
else:
crufty_ancestors.append(cand)
for parent in parents:
if parent not in dict.keys():
find_ancestry(parent,dict,tags)
dict[id] = parents
for cruft in crufty_ancestors:
if cruft not in dict.keys():
print '\tCRUFT -or- INTENTIONAL MULTIPLE: ', cruft
else:
print '\t(could not get md)'
return dict
def main():
parser = argparse.ArgumentParser()
parser.add_argument('id', nargs='?', default=False)
parser.add_argument('--foo',
help='foo',
action='store_true')
largs = parser.parse_args()
global args
args = largs
if args.id is None or args.id is False or len(args.id) == 0:
parser.print_help()
sys.exit(0)
dict = find_ancestry(args.id)
print 'Dictionary representation:\n\t', dict
flattened = reexpand_ancestry(args.id,dict)
flattened.remove(args.id)
print 'Rehydrated flat list:\n\t', flattened
if False:
hello = {"foo":1, "bar": 2}
colredis.hmset("hello", hello)
h = colredis.hgetall("hello")
print h
if __name__ == '__main__':
sys.exit(main())