1- /**
2- * Code contributed to the Learning Layers project
3- * http://www.learning-layers.eu
4- * Development is partly funded by the FP7 Programme of the European Commission under
5- * Grant Agreement FP7-ICT-318209.
6- * Copyright (c) 2014, Graz University of Technology - KTI (Knowledge Technologies Institute).
7- * For a list of contributors see the AUTHORS file at the top-level directory of this distribution.
8- *
9- * Licensed under the Apache License, Version 2.0 (the "License");
10- * you may not use this file except in compliance with the License.
11- * You may obtain a copy of the License at
12- *
13- * http://www.apache.org/licenses/LICENSE-2.0
14- *
15- * Unless required by applicable law or agreed to in writing, software
16- * distributed under the License is distributed on an "AS IS" BASIS,
17- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18- * See the License for the specific language governing permissions and
19- * limitations under the License.
20- */
1+ /**
2+ * Code contributed to the Learning Layers project
3+ * http://www.learning-layers.eu
4+ * Development is partly funded by the FP7 Programme of the European Commission under
5+ * Grant Agreement FP7-ICT-318209.
6+ * Copyright (c) 2014, Graz University of Technology - KTI (Knowledge Technologies Institute).
7+ * For a list of contributors see the AUTHORS file at the top-level directory of this distribution.
8+ *
9+ * Licensed under the Apache License, Version 2.0 (the "License");
10+ * you may not use this file except in compliance with the License.
11+ * You may obtain a copy of the License at
12+ *
13+ * http://www.apache.org/licenses/LICENSE-2.0
14+ *
15+ * Unless required by applicable law or agreed to in writing, software
16+ * distributed under the License is distributed on an "AS IS" BASIS,
17+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18+ * See the License for the specific language governing permissions and
19+ * limitations under the License.
20+ */
2121package at .kc .tugraz .ss .serv .dataimport .impl .evernote ;
2222
2323import at .kc .tugraz .socialserver .utils .SSFileExtE ;
2424import at .kc .tugraz .socialserver .utils .SSFileU ;
2525import at .kc .tugraz .socialserver .utils .SSLogU ;
26+ import at .kc .tugraz .socialserver .utils .SSMimeTypeU ;
2627import at .kc .tugraz .socialserver .utils .SSStrU ;
2728import at .kc .tugraz .ss .datatypes .datatypes .entity .SSUri ;
2829import at .kc .tugraz .ss .datatypes .datatypes .enums .SSEntityE ;
@@ -76,18 +77,18 @@ public void handleNoteContent() throws Exception{
7677 pdfFilePath = localWorkPath + SSServCaller .fileIDFromURI (user , fileUri );
7778
7879 SSFileU .writeStr (
79- note .getContent (),
80+ note .getContent (),
8081 xhtmlFilePath );
8182
8283 try {
8384
8485 SSFileU .writeStr (
85- downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks (xhtmlFilePath ),
86+ downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks (xhtmlFilePath ),
8687 xhtmlFilePath );
8788
8889 SSFileU .writePDFFromXHTML (
8990 pdfFilePath ,
90- xhtmlFilePath ,
91+ xhtmlFilePath ,
9192 true );
9293
9394 }catch (Exception error ){
@@ -97,16 +98,16 @@ public void handleNoteContent() throws Exception{
9798
9899 try {
99100 SSFileU .writeStr (
100- reduceXHTMLToTextAndImage (xhtmlFilePath ),
101+ reduceXHTMLToTextAndImage (xhtmlFilePath ),
101102 xhtmlFilePath );
102103
103104 SSFileU .writeStr (
104- downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks (xhtmlFilePath ),
105+ downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks (xhtmlFilePath ),
105106 xhtmlFilePath );
106107
107108 SSFileU .writePDFFromXHTML (
108109 pdfFilePath ,
109- xhtmlFilePath ,
110+ xhtmlFilePath ,
110111 true );
111112
112113 }catch (Exception error1 ){
@@ -208,17 +209,17 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
208209 }
209210 }
210211
211- String mediaTag ;
212- if (tmpTag .contains ("<en-media" )){
213-
214- tagIndex = tmpTag .indexOf ("<en-media" );
215- tagEndIndex = tmpTag .indexOf (">" );
216-
217- if (tagEndIndex != -1 ){
218- mediaTag = tmpTag .substring (tagIndex , tagEndIndex );
219- result += mediaTag + "></en-media>" ;
220- }
221- }
212+ // String mediaTag;
213+ // if(tmpTag.contains("<en-media")){
214+ //
215+ // tagIndex = tmpTag.indexOf("<en-media");
216+ // tagEndIndex = tmpTag.indexOf(">");
217+ //
218+ // if(tagEndIndex != -1){
219+ // mediaTag = tmpTag.substring(tagIndex, tagEndIndex);
220+ // result += mediaTag + "></en-media>";
221+ // }
222+ // }
222223
223224 while (tmpTag .contains ("href=\" " )){
224225
@@ -261,7 +262,7 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
261262 }
262263 }
263264
264- result +=
265+ result +=
265266 "</body>\n "
266267 + "\n "
267268 + "</html>" ;
@@ -280,18 +281,20 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
280281 }
281282
282283 private String downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks (
283- final String path ) throws Exception {
284+ final String path ) throws Exception {
284285
285286 BufferedReader lineReader = null ;
286287 String result = SSStrU .empty ;
287288 Resource resource ;
288289 SSUri fileURI ;
289290 String fileID ;
290291 String line ;
291- String mediaTag ;
292+ String tmpLine ;
292293 String hash ;
293- int tagIndex ;
294- int tagEndIndex ;
294+ int startIndex ;
295+ int endIndex1 ;
296+ int endIndex2 ;
297+ int endIndex ;
295298 int hashIndex ;
296299 int hashEndIndex ;
297300
@@ -303,44 +306,81 @@ private String downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks(
303306
304307 line = line .trim ();
305308
306- if (!line .contains ("<en-media" )){
307- result += line + SSStrU .backslashRBackslashN ;
308- continue ;
309- }
310-
311- if (!line .contains ("</en-media>" )){
312- result += line + SSStrU .backslashRBackslashN ;
313- continue ;
314- //throw new Exception("xhtml invalid"); ///>
315- }
316-
317- tagIndex = line .indexOf ("<en-media" );
318- tagEndIndex = line .indexOf ("</en-media>" );
319- mediaTag = line .substring (tagIndex + 10 , tagEndIndex );
320-
321- if (!line .contains ("type=\" image/png\" " )){ //application/pdf //application/vnd.openxmlformats-officedocument.presentationml.presentation //"image/jpeg" //application/msword //application/vnd.openxmlformats-officedocument.wordprocessingml.document
322- result += line + SSStrU .backslashRBackslashN ;
323- continue ;
324- }
325-
326- if (!line .contains ("hash=\" " )){
327- result += line + SSStrU .backslashRBackslashN ;
328- continue ;
329- }
330-
331- hashIndex = line .indexOf ("hash=\" " );
332- hashEndIndex = line .indexOf ("\" " , hashIndex + 6 );
333- hash = line .substring (hashIndex + 6 , hashEndIndex );
309+ tmpLine = line ;
334310
335- if (hashsPerFileURIs .containsKey (hash )){
336- fileURI = hashsPerFileURIs .get (hash );
311+ while (tmpLine .contains ("<en-media" )){
312+
313+ startIndex = tmpLine .indexOf ("<en-media" );
314+
315+ if (
316+ !tmpLine .contains ("</en-media>" ) &&
317+ !tmpLine .contains ("/>" )){
318+
319+ result += tmpLine ;
320+ break ; //xhtml invalid
321+ }
322+
323+ if (!tmpLine .contains ("hash=\" " )){
324+ result += tmpLine ;
325+ break ;
326+ }
337327
338- fileID = SSServCaller .fileIDFromURI (user , fileURI );
339- }else {
328+ endIndex1 = tmpLine .indexOf ("</en-media>" );
329+ endIndex2 = tmpLine .indexOf ("/>" );
330+
331+ if (endIndex1 != -1 ){
332+ endIndex = endIndex1 ;
333+ }else {
334+ endIndex = endIndex2 ;
335+ }
336+ // imageGif
337+ if (//application/pdf //application/vnd.openxmlformats-officedocument.presentationml.presentation //application/msword //application/vnd.openxmlformats-officedocument.wordprocessingml.document
338+ !(tmpLine .contains ("type=\" " + SSMimeTypeU .imagePng + "\" " ) &&
339+ endIndex > tmpLine .indexOf ("type=\" " + SSMimeTypeU .imagePng + "\" " )
340+ ) &&
341+ !(tmpLine .contains ("type=\" " + SSMimeTypeU .imageJpeg + "\" " ) &&
342+ endIndex > tmpLine .indexOf ("type=\" " + SSMimeTypeU .imageJpeg + "\" " )
343+ )){
344+
345+ if (endIndex == endIndex1 ){
346+ result += tmpLine .substring (0 , endIndex + 11 );
347+ tmpLine = tmpLine .substring (endIndex + 11 );
348+ }else {
349+
350+ result += tmpLine .substring (0 , endIndex + 2 );
351+ tmpLine = tmpLine .substring (endIndex + 2 );
352+ }
353+
354+ continue ;
355+ }
356+
357+ hashIndex = tmpLine .indexOf ("hash=\" " );
358+
359+ if (!(tmpLine .contains ("hash=\" " ) && endIndex > hashIndex )){
360+
361+ if (endIndex == endIndex1 ){
362+ result += tmpLine .substring (0 , endIndex + 11 );
363+ tmpLine = tmpLine .substring (endIndex + 11 );
364+ }else {
365+ result += tmpLine .substring (0 , endIndex + 2 );
366+ tmpLine = tmpLine .substring (endIndex + 2 );
367+ }
368+
369+ continue ;
370+ }
371+
372+ hashEndIndex = tmpLine .indexOf ("\" " , hashIndex + 6 );
373+ hash = tmpLine .substring (hashIndex + 6 , hashEndIndex );
374+
375+ // if(hashsPerFileURIs.containsKey(hash)){
376+ // fileURI = hashsPerFileURIs.get(hash);
377+
378+ // fileID = SSServCaller.fileIDFromURI(user, fileURI);
379+ // }else{
340380 fileURI = SSServCaller .vocURICreate (SSFileExtE .png );
341381 fileID = SSServCaller .fileIDFromURI (user , fileURI );
342382
343- hashsPerFileURIs .put (hash , fileURI );
383+ // hashsPerFileURIs.put(hash, fileURI);
344384
345385 resource =
346386 SSServCaller .evernoteResourceByHashGet (
@@ -353,17 +393,22 @@ private String downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks(
353393 new FileOutputStream (localWorkPath + fileID ),
354394 resource .getData ().getBody (),
355395 resource .getData ().getSize ());
396+ // }
397+
398+ result += tmpLine .substring (0 , startIndex ) + "<img width=\" " + resource .getWidth () + "\" height=\" " + resource .getHeight () + "\" class=\" xmyImagex\" src=\" " + localWorkPath + fileID + "\" />" ;
399+
400+ if (endIndex == endIndex1 ){
401+ tmpLine = tmpLine .substring (endIndex + 11 , tmpLine .length ());
402+ }else {
403+ tmpLine = tmpLine .substring (endIndex + 2 , tmpLine .length ());
404+ }
356405 }
357406
358- line =
359- line .substring (0 , tagIndex ) +
360- "<img class=\" xmyImagex\" src=\" " + localWorkPath + fileID + "\" />" +
361- line .substring (tagEndIndex + 11 , line .length ());
362-
363- result += line + SSStrU .backslashRBackslashN ;
407+ result += tmpLine ;
408+ result += SSStrU .backslashN ;
364409 }
365410
366- return result ;
411+ return result . replace ( "&nbsp;" , SSStrU . empty ). replace ( "Â" , SSStrU . empty ). trim () ;
367412
368413 }catch (Exception error ){
369414 SSServErrReg .regErrThrow (error );
0 commit comments