Skip to content

Commit cfa66f5

Browse files
committed
LL-1087
1 parent 84ea07e commit cfa66f5

File tree

1 file changed

+39
-30
lines changed

1 file changed

+39
-30
lines changed

serv/jobs/ss-dataimport/ss-dataimport-impl/src/main/java/at/kc/tugraz/ss/serv/dataimport/impl/evernote/SSDataImportEvernoteNoteContentHandler.java

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,9 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
165165

166166
BufferedReader br = null;
167167
String result = SSStrU.empty;
168+
String mediaTag;
169+
int mediaStartIndex;
170+
int mediaEndIndex;
168171

169172
try{
170173

@@ -174,10 +177,6 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
174177
"\n" +
175178
"<html xmlns=\"http://www.w3.org/1999/xhtml\">\n" +
176179
"\n" +
177-
"<head>\n" +
178-
" <title>Title of document</title>\n" +
179-
"</head>\n" +
180-
"\n" +
181180
"<body>\n";
182181

183182
String line, text, tag, tmpTag, href, title;
@@ -205,21 +204,31 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
205204
text = line.substring(0, tagIndex).replace("&amp;nbsp;", SSStrU.empty).replace("Â", SSStrU.empty).trim();
206205

207206
if(!text.isEmpty()){
208-
result += text + SSStrU.backslashRBackslashN;
207+
result += "<div>" + text + "</div>" + SSStrU.backslashRBackslashN;
209208
}
210209
}
211210

212-
// String mediaTag;
213-
// if(tmpTag.contains("<en-media")){
214-
//
215-
// tagIndex = tmpTag.indexOf("<en-media");
216-
// tagEndIndex = tmpTag.indexOf(">");
217-
//
218-
// if(tagEndIndex != -1){
219-
// mediaTag = tmpTag.substring(tagIndex, tagEndIndex);
220-
// result += mediaTag + "></en-media>";
221-
// }
222-
// }
211+
if(tmpTag.startsWith("<en-media")){
212+
213+
mediaStartIndex = tmpTag.indexOf("<en-media");
214+
mediaEndIndex = tmpTag.indexOf(">");
215+
216+
if(mediaEndIndex != -1){
217+
mediaTag = tmpTag.substring(mediaStartIndex, mediaEndIndex + 1);
218+
219+
if(
220+
!mediaTag.endsWith("/>") &&
221+
mediaTag.length() > 2){
222+
223+
result += mediaTag.substring(0, mediaTag.length() - 1) + "/>";
224+
}else{
225+
result += mediaTag;
226+
}
227+
228+
line = line.replace(mediaTag, SSStrU.empty).replace("&amp;nbsp;", SSStrU.empty).replace("Â", SSStrU.empty).trim();
229+
continue;
230+
}
231+
}
223232

224233
while(tmpTag.contains("href=\"")){
225234

@@ -229,25 +238,23 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
229238

230239
if(tmpTag.contains("title=\"")){
231240

232-
titleIndex = tmpTag.indexOf("title=\"");
241+
titleIndex = tmpTag.indexOf("title=\"");
233242
titleEndIndex = tmpTag.indexOf("\"", titleIndex + 7);
234-
title = tmpTag.substring(titleIndex + 7, titleEndIndex);
235-
236-
result += title.replace("&amp;nbsp;", SSStrU.empty).replace("Â", SSStrU.empty) + ": " + SSStrU.backslashRBackslashN;
237-
238-
tmpTag = tmpTag.substring(0, titleIndex) + tmpTag.substring(titleEndIndex + 1, tmpTag.length() - 1);
243+
title = tmpTag.substring(titleIndex + 7, titleEndIndex);
244+
title = title.replace("&amp;nbsp;", SSStrU.empty).replace("Â", SSStrU.empty);
239245

240-
hrefIndex = tmpTag.indexOf("href=\"");
246+
tmpTag = tmpTag.substring(0, titleIndex) + tmpTag.substring(titleEndIndex + 1, tmpTag.length() - 1);
247+
hrefIndex = tmpTag.indexOf("href=\"");
241248
hrefEndIndex = tmpTag.indexOf("\"", hrefIndex + 6);
242-
href = tmpTag.substring(hrefIndex + 6, hrefEndIndex);
249+
href = tmpTag.substring(hrefIndex + 6, hrefEndIndex);
243250

244251
tmpTag = tmpTag.substring(0, hrefIndex) + tmpTag.substring(hrefEndIndex + 1, tmpTag.length() - 1);
245252

246-
result += href + SSStrU.backslashRBackslashN;
253+
result += "<div>" + "<a href=\"" + href + "\">" + title + "</a>" + "</div>" + SSStrU.backslashRBackslashN;
247254

248255
}else{
249-
result += "link" + ": " + href + SSStrU.backslashRBackslashN;
250-
256+
result += "<div>" + "<a href=\"" + href + "\">" + href + "</a>" + "</div>" + SSStrU.backslashRBackslashN;
257+
251258
tmpTag = tmpTag.substring(0, hrefIndex) + tmpTag.substring(hrefEndIndex + 1, tmpTag.length() - 1);
252259
}
253260
}
@@ -258,13 +265,12 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
258265
line = line.replace("&amp;nbsp;", SSStrU.empty).replace("Â", SSStrU.empty).trim();
259266

260267
if(!line.isEmpty()){
261-
result += line + SSStrU.backslashRBackslashN;
268+
result += "<div>" + line + "</div>" + SSStrU.backslashRBackslashN;
262269
}
263270
}
264271

265272
result +=
266273
"</body>\n"
267-
+ "\n"
268274
+ "</html>";
269275

270276
return result;
@@ -333,13 +339,16 @@ private String downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks(
333339
}else{
334340
endIndex = endIndex2;
335341
}
336-
// imageGif
342+
337343
if(//application/pdf //application/vnd.openxmlformats-officedocument.presentationml.presentation //application/msword //application/vnd.openxmlformats-officedocument.wordprocessingml.document
338344
!(tmpLine.contains("type=\"" + SSMimeTypeU.imagePng + "\"") &&
339345
endIndex > tmpLine.indexOf("type=\"" + SSMimeTypeU.imagePng + "\"")
340346
) &&
341347
!(tmpLine.contains("type=\"" + SSMimeTypeU.imageJpeg + "\"") &&
342348
endIndex > tmpLine.indexOf("type=\"" + SSMimeTypeU.imageJpeg + "\"")
349+
) &&
350+
!(tmpLine.contains("type=\"" + SSMimeTypeU.imageGif + "\"") &&
351+
endIndex > tmpLine.indexOf("type=\"" + SSMimeTypeU.imageGif + "\"")
343352
)){
344353

345354
if(endIndex == endIndex1){

0 commit comments

Comments
 (0)