-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathparse-contribution.js
More file actions
145 lines (133 loc) · 5.55 KB
/
parse-contribution.js
File metadata and controls
145 lines (133 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
const arrayOfEssays = {}
function parse(string, mimeType) {
const parser = new DOMParser();
return parser.parseFromString(string, mimeType)
}
/**
* Fetches a resource using XMLHttpRequest and a Promise
* @param {String} url - The URL of the resource to get
* @param {String|FormData|Blob|File} [postData] - Any data to POST, set to false when GET
* @param {String} [responseType] - The type of response to expect (defaults to document)
* @returns {Promise}
*/
function fetch(url, postData, responseType) {
return new Promise((res, rej) => {
var x = new XMLHttpRequest();
x.responseType = responseType || 'document';
x.addEventListener('load', e => res(x.response, x));
x.addEventListener('error', e => {
console.warn('XHR Error', url, e);
rej(x.response, x);
});
if (!postData) {
x.open('GET', url);
x.send();
} else {
x.open('POST', url);
if ('string' === typeof postData)
x.setRequestHeader('Content-type', 'text/html');
x.send(postData);
}
});
}
// shorthand to convert HTML collection into array
function arrayFromCollection(parent, tag) {
return [].map.call(parent.querySelectorAll(tag), (el) => {
return el
})
}
function transformData(parsedText) {
const essayContent = Array.from(parsedText.body.children)
const textTitle = essayContent.find((e) => { return e.id === 'EssayTitle' })
const textBody = essayContent.find((e) => { return e.id === 'EssayBody' })
const paragraphs = arrayFromCollection(textBody, "p")
let entityPositionInText = 0
return paragraphs.map((paragraph, p) => {
const entities = arrayFromCollection(paragraph, "[typeof][resource]").map((parentEntity, pe) => {
entityPositionInText = entityPositionInText + 1
console.log('current entity >>>', entityPositionInText)
// init arrays to store resources
const targets = []
const targetedBy = []
// arrays from collection of nodes
const timePoints = arrayFromCollection(parentEntity, 'span')
const parentProperties = arrayFromCollection(parentEntity, 'meta')
// init container for instant child
const instantObj = {}
// default text label is content of span
let textLabel = ''
const rawSpanContent = parentEntity.innerText
textLabel = rawSpanContent.replace(/ {2}|\n/g, "")
console.log(parentProperties)
// iterate over parents properties to infer causality and other props
parentProperties.forEach((prop) => {
const currentAttribute = prop.getAttribute('property')
console.log(currentAttribute)
switch (currentAttribute) {
// Adding resources id to arrays
case 'ac:linksTo':
targets.push(prop.getAttribute('resource').substring(1))
break;
case 'ac:linkedFrom':
targetedBy.push(prop.getAttribute('resource').substring(1))
break;
case 'ac:label':
textLabel = prop.getAttribute('content')
break;
// Adding properties for instant child
case 'time:inXSDgYear':
instantObj[currentAttribute] = +prop.getAttribute('content')
break;
case 'rdfs:label':
case 'ac:hasIndefiniteness':
instantObj[currentAttribute] = prop.getAttribute('content')
break;
}
})
if (timePoints.length === 0) {
// if there is no child: create one and store instant metadata
instantObj['time:positionInText'] = entityPositionInText
timePoints[0] = instantObj
} else {
// if there are children: parse them and format them
timePoints.forEach((entity, e) => {
const intervalObj = {}
instantObj['time:positionInText'] = entityPositionInText
const childrenProps = arrayFromCollection(entity, 'meta')
childrenProps.forEach((cprop) => {
const currentAttribute = cprop.getAttribute('property')
if (currentAttribute === 'time:inXSDgYear') {
intervalObj[currentAttribute] = +cprop.getAttribute('content')
} else {
intervalObj[currentAttribute] = cprop.getAttribute('content')
}
})
// substitute formatted element within the array
timePoints[e] = intervalObj
})
}
return {
entityType: parentEntity.getAttribute('typeof'),
targets,
targetedBy,
textLabel,
timePoints
}
})
return {
paragraphNumber: p + 1,
entities
}
})
}
async function init () {
let rawText = await fetch('collaborative_tagging/combustion.html', false, 'text').then(
function (response) {
return response
}
);
const parsedEssay = parse(rawText, 'text/html')
const data = transformData(parsedEssay)
console.log(data)
}
init()