Skip to content

Commit 0b36389

Browse files
committed
load lazily also the json files (and make them valid json)
1 parent 948f915 commit 0b36389

File tree

2 files changed

+90
-33
lines changed

2 files changed

+90
-33
lines changed

udapi/block/write/corefhtml.py

Lines changed: 81 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -87,41 +87,78 @@
8787
$("#main-menu").toggle();
8888
}
8989
90+
function load_doc(doc_num) {
91+
loading_now = true;
92+
console.log("loading doc" + doc_num + ".html");
93+
$.get(docs_dir + "/doc" + doc_num + ".html", function(data){
94+
$("#main").append(data);
95+
add_mention_listeners($("#doc" + doc_num + " .m"));
96+
$("#doc" + doc_num + " .sentence").each(add_show_tree_button);
97+
loading_now = false;
98+
}).fail(function(){
99+
if (! load_fail_reported) {
100+
load_fail_reported = true;
101+
alert("Cannot load " + docs_dir + "/doc" + doc_num
102+
+ ".html\\nLocal files do not support lazy loading. Run a web server 'python -m http.server'");
103+
}
104+
});
105+
}
106+
90107
var docs_loaded = 1;
108+
var load_fail_reported = false;
109+
var loading_now = false;
110+
add_show_tree_button = function(index, el){ } // to be redefined later if show_trees=True
91111
$(window).scroll(function () {
92-
if ($(window).scrollTop() >= $(document).height() - $(window).height() - 42 && docs_loaded < all_docs) {
112+
if (!loading_now && $(window).scrollTop() >= $(document).height() - $(window).height() - 42 && docs_loaded < all_docs) {
93113
docs_loaded += 1;
94-
console.log("loading doc" + docs_loaded + ".html");
95-
$.get(docs_dir + "/doc" + docs_loaded + ".html", function(data){
96-
$("#main").append(data);
97-
add_mention_listeners($("#doc" + docs_loaded + " .m"));
98-
});
114+
load_doc(docs_loaded);
99115
}
100116
});
101117
'''
102118

103119
SCRIPT_SHOWTREE = '''
104-
$(".sentence").each(function(index){
105-
var sent_id = this.id;
106-
$(this).prepend(
120+
function show_tree_in_tdiv(tdiv, doc_number, index){
121+
tdiv.treexView([docs_json[doc_number][index]]);
122+
$("<button>", {append:"×", class:"close"}).prependTo(tdiv).on("click", function(){tdiv.remove();});
123+
}
124+
125+
var load_json_fail_reported = false;
126+
add_show_tree_button = function(index, el){
127+
var sent_id = el.id;
128+
$(el).prepend(
107129
$("<button>", {append: "🌲", id:"button-"+sent_id, title: "show dependency tree", class: "showtree"}).on("click", function() {
108130
var tree_div = $("#tree-"+sent_id);
109131
if (tree_div.length == 0){
110-
var tdiv = $("<div>", {id:"tree-"+sent_id, class:"tree"}).insertAfter($(this));
111-
tdiv.treexView([data[index]]);
112-
$("<button>", {append:"×", class:"close"}).prependTo(tdiv).on("click", function(){$(this).parent().remove();});
113132
$('#button-'+sent_id).attr('title', 'hide dependency tree');
133+
var tdiv = $("<div>", {id:"tree-"+sent_id, class:"tree"}).insertAfter($(el));
134+
doc_number = 1 * el.parentElement.id.substr(3);
135+
if (docs_json[doc_number]){
136+
show_tree_in_tdiv(tdiv, doc_number, index);
137+
} else {
138+
$.getJSON(docs_dir + "/doc" + doc_number + ".json", function(data){
139+
docs_json[doc_number] = data;
140+
show_tree_in_tdiv(tdiv, doc_number, index);
141+
}).fail(function(){
142+
if (! load_json_fail_reported) {
143+
load_json_fail_reported = true;
144+
alert("Cannot load " + docs_dir + "/doc" + doc_number
145+
+ ".json\\nLocal files do not support lazy loading. Run a web server 'python -m http.server'");
146+
}
147+
});
148+
}
114149
} else {tree_div.remove();}
115150
})
116151
);
117-
});
152+
}
153+
154+
$("#doc1 .sentence").each(add_show_tree_button);
118155
'''
119156

120157
WRITE_HTML = udapi.block.write.html.Html()
121158

122159
class CorefHtml(BaseWriter):
123160

124-
def __init__(self, docs_dir='.', show_trees=True, show_eid=False, show_etype=False, colors=7, **kwargs):
161+
def __init__(self, docs_dir='docs', show_trees=True, show_eid=False, show_etype=False, colors=7, **kwargs):
125162
super().__init__(**kwargs)
126163
self.docs_dir = docs_dir
127164
self.show_trees = show_trees
@@ -130,6 +167,8 @@ def __init__(self, docs_dir='.', show_trees=True, show_eid=False, show_etype=Fal
130167
self.colors = colors
131168
if docs_dir != '.' and not os.path.exists(docs_dir):
132169
os.makedirs(docs_dir)
170+
self._mention_ids = {}
171+
self._entity_colors = {}
133172

134173
def _representative_word(self, entity):
135174
# return the first PROPN or NOUN. Or the most frequent one?
@@ -141,18 +180,21 @@ def _representative_word(self, entity):
141180
return lemma_or_form(nodes[0])
142181
return lemma_or_form(heads[0])
143182

144-
def process_ud_doc(self, ud_doc, doc_num, mention_ids, entity_colors):
183+
def process_ud_doc(self, ud_doc, doc_num):
145184
print(f'<div class="doc" id="doc{doc_num}">')
146185
for tree in ud_doc:
147-
self.process_tree(tree, mention_ids, entity_colors)
186+
self.process_tree(tree)
148187
print('</div>')
149188

150189
def process_document(self, doc):
151-
ud_docs = []
190+
ud_docs, doc_num, sent_id2doc = [], 0, {}
152191
for tree in doc.trees:
153192
if tree.newdoc or not ud_docs:
154193
ud_docs.append([])
194+
doc_num += 1
155195
ud_docs[-1].append(tree)
196+
sent_id2doc[tree.sent_id] = doc_num
197+
# TODO: use sent_id2doc
156198

157199
print(HEADER)
158200
if self.show_trees:
@@ -170,16 +212,16 @@ def process_document(self, doc):
170212
print('</style>')
171213
print('</head>\n<body>\n<div id="wrap">')
172214

173-
mention_ids = {}
174-
entity_colors = {}
215+
self._mention_ids = {}
216+
self._entity_colors = {}
175217
entities_of_type = Counter()
176218
for entity in doc.coref_entities:
177219
if self.colors:
178220
count = entities_of_type[entity.etype]
179221
entities_of_type[entity.etype] = count + 1
180-
entity_colors[entity] = f'c{count % self.colors}'
222+
self._entity_colors[entity] = f'c{count % self.colors}'
181223
for idx, mention in enumerate(entity.mentions, 1):
182-
mention_ids[mention] = f'{entity.eid}e{idx}'
224+
self._mention_ids[mention] = f'{entity.eid}e{idx}'
183225

184226
print('<div id="overview">')
185227
print('<table><thead><tr><th title="entity id">eid</th>'
@@ -208,34 +250,43 @@ def process_document(self, doc):
208250
'<button id="menubtn" title="Visualization options" onclick="menuclick(this)"><div class="b1"></div><div class="b2"></div><div class="b3"></div></button>\n')
209251

210252
# The first ud_doc will be printed to the main html file.
211-
self.process_ud_doc(ud_docs[0], 1, mention_ids, entity_colors)
253+
self.process_ud_doc(ud_docs[0], 1)
212254
print('</div>') # id=main
213255

214256
# Other ud_docs will be printed into separate files (so they can be loaded lazily)
215257
orig_stdout = sys.stdout
216258
try:
217259
for i, ud_doc in enumerate(ud_docs[1:], 2):
218260
sys.stdout = open(f"{self.docs_dir}/doc{i}.html", 'wt')
219-
self.process_ud_doc(ud_doc, i, mention_ids, entity_colors)
261+
self.process_ud_doc(ud_doc, i)
220262
sys.stdout.close()
221263
finally:
222264
sys.stdout = orig_stdout
223265

224266
print(f'<script>\nvar all_docs = {len(ud_docs)};\nvar docs_dir = "{self.docs_dir}";')
225267
print(SCRIPT_BASE)
226268
if self.show_trees:
227-
WRITE_HTML.print_doc_json(doc)
269+
print('docs_json = [false, ', end='') # 1-based index, so dummy docs_json[0]
270+
WRITE_HTML.print_doc_json(ud_docs[0])
271+
print('];')
272+
try:
273+
for i, ud_doc in enumerate(ud_docs[1:], 2):
274+
sys.stdout = open(f"{self.docs_dir}/doc{i}.json", 'wt')
275+
WRITE_HTML.print_doc_json(ud_doc)
276+
sys.stdout.close()
277+
finally:
278+
sys.stdout = orig_stdout
228279
print(SCRIPT_SHOWTREE)
229280
print('</script>')
230281
print('</div></body></html>')
231282

232-
def _start_subspan(self, subspan, mention_ids, entity_colors, crossing=False):
283+
def _start_subspan(self, subspan, crossing=False):
233284
m = subspan.mention
234285
e = m.entity
235-
classes = f'{e.eid} {mention_ids[m]} {e.etype or "other"} m'
286+
classes = f'{e.eid} {self._mention_ids[m]} {e.etype or "other"} m'
236287
title = f'eid={subspan.subspan_eid}\netype={e.etype}\nhead={m.head.form}'
237288
if self.colors:
238-
classes += f' {entity_colors[e]}'
289+
classes += f' {self._entity_colors[e]}'
239290
if all(w.is_empty() for w in subspan.words):
240291
classes += ' empty'
241292
if len(e.mentions) == 1:
@@ -252,7 +303,7 @@ def _start_subspan(self, subspan, mention_ids, entity_colors, crossing=False):
252303
f'<span class="labels"><b class="eid">{subspan.subspan_eid}</b>'
253304
f' <i class="etype">{e.etype}</i></span>', end='')
254305

255-
def process_tree(self, tree, mention_ids, entity_colors):
306+
def process_tree(self, tree):
256307
mentions = set()
257308
nodes_and_empty = tree.descendants_and_empty
258309
for node in nodes_and_empty:
@@ -273,7 +324,7 @@ def process_tree(self, tree, mention_ids, entity_colors):
273324
for node in nodes_and_empty:
274325
while subspans and subspans[-1].words[0] == node:
275326
subspan = subspans.pop()
276-
self._start_subspan(subspan, mention_ids, entity_colors)
327+
self._start_subspan(subspan)
277328
opened.append(subspan)
278329

279330
is_head = self._is_head(node)
@@ -311,7 +362,7 @@ def process_tree(self, tree, mention_ids, entity_colors):
311362
opened = new_opened
312363
print('</span>' * (len(endings) + len(brokens)), end='')
313364
for broken in brokens:
314-
self._start_subspan(broken, mention_ids, entity_colors, True)
365+
self._start_subspan(broken, True)
315366
opened.append(subspan)
316367

317368
if not node.no_space_after:

udapi/block/write/html.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ def process_document(self, doc):
7979
print('</head>\n<body>')
8080
print('<button style="float:right" type="submit" onclick="saveTree()">'
8181
'<span>Save as SVG</span></button><div id="treex-view"></div><script>')
82+
print('data=', end='')
8283
self.print_doc_json(doc)
84+
print(';')
8385
print("$('#treex-view').treexView(data);")
8486
print('''function saveTree() {
8587
var svg_el = jQuery('svg');
@@ -91,14 +93,18 @@ def process_document(self, doc):
9193
print('</script></body></html>')
9294

9395
def print_doc_json(self, doc):
94-
print('data=[')
96+
print('[')
9597
for (bundle_number, bundle) in enumerate(doc, 1):
9698
if bundle_number != 1:
9799
print(',', end='')
98100
print('{"zones":{', end='')
99101
first_zone = True
100102
desc = ''
101-
for tree in bundle.trees:
103+
try:
104+
trees = bundle.trees
105+
except:
106+
trees = [bundle] # allow to call print_doc_json([tree1, tree2])
107+
for tree in trees:
102108
zone = tree.zone
103109
if first_zone:
104110
first_zone = False
@@ -116,7 +122,7 @@ def print_doc_json(self, doc):
116122
print(']}}}')
117123
# print desc without the extra starting comma
118124
print('},"desc":[%s]}' % desc[1:])
119-
print('];')
125+
print(']')
120126

121127

122128
@staticmethod

0 commit comments

Comments
 (0)