@@ -165,6 +165,9 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
165165
166166 BufferedReader br = null ;
167167 String result = SSStrU .empty ;
168+ String mediaTag ;
169+ int mediaStartIndex ;
170+ int mediaEndIndex ;
168171
169172 try {
170173
@@ -174,10 +177,6 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
174177 "\n " +
175178 "<html xmlns=\" http://www.w3.org/1999/xhtml\" >\n " +
176179 "\n " +
177- "<head>\n " +
178- " <title>Title of document</title>\n " +
179- "</head>\n " +
180- "\n " +
181180 "<body>\n " ;
182181
183182 String line , text , tag , tmpTag , href , title ;
@@ -205,21 +204,31 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
205204 text = line .substring (0 , tagIndex ).replace ("&nbsp;" , SSStrU .empty ).replace ("Â" , SSStrU .empty ).trim ();
206205
207206 if (!text .isEmpty ()){
208- result += text + SSStrU .backslashRBackslashN ;
207+ result += "<div>" + text + "</div>" + SSStrU .backslashRBackslashN ;
209208 }
210209 }
211210
212- // String mediaTag;
213- // if(tmpTag.contains("<en-media")){
214- //
215- // tagIndex = tmpTag.indexOf("<en-media");
216- // tagEndIndex = tmpTag.indexOf(">");
217- //
218- // if(tagEndIndex != -1){
219- // mediaTag = tmpTag.substring(tagIndex, tagEndIndex);
220- // result += mediaTag + "></en-media>";
221- // }
222- // }
211+ if (tmpTag .startsWith ("<en-media" )){
212+
213+ mediaStartIndex = tmpTag .indexOf ("<en-media" );
214+ mediaEndIndex = tmpTag .indexOf (">" );
215+
216+ if (mediaEndIndex != -1 ){
217+ mediaTag = tmpTag .substring (mediaStartIndex , mediaEndIndex + 1 );
218+
219+ if (
220+ !mediaTag .endsWith ("/>" ) &&
221+ mediaTag .length () > 2 ){
222+
223+ result += mediaTag .substring (0 , mediaTag .length () - 1 ) + "/>" ;
224+ }else {
225+ result += mediaTag ;
226+ }
227+
228+ line = line .replace (mediaTag , SSStrU .empty ).replace ("&nbsp;" , SSStrU .empty ).replace ("Â" , SSStrU .empty ).trim ();
229+ continue ;
230+ }
231+ }
223232
224233 while (tmpTag .contains ("href=\" " )){
225234
@@ -229,25 +238,23 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
229238
230239 if (tmpTag .contains ("title=\" " )){
231240
232- titleIndex = tmpTag .indexOf ("title=\" " );
241+ titleIndex = tmpTag .indexOf ("title=\" " );
233242 titleEndIndex = tmpTag .indexOf ("\" " , titleIndex + 7 );
234- title = tmpTag .substring (titleIndex + 7 , titleEndIndex );
235-
236- result += title .replace ("&nbsp;" , SSStrU .empty ).replace ("Â" , SSStrU .empty ) + ": " + SSStrU .backslashRBackslashN ;
237-
238- tmpTag = tmpTag .substring (0 , titleIndex ) + tmpTag .substring (titleEndIndex + 1 , tmpTag .length () - 1 );
243+ title = tmpTag .substring (titleIndex + 7 , titleEndIndex );
244+ title = title .replace ("&nbsp;" , SSStrU .empty ).replace ("Â" , SSStrU .empty );
239245
240- hrefIndex = tmpTag .indexOf ("href=\" " );
246+ tmpTag = tmpTag .substring (0 , titleIndex ) + tmpTag .substring (titleEndIndex + 1 , tmpTag .length () - 1 );
247+ hrefIndex = tmpTag .indexOf ("href=\" " );
241248 hrefEndIndex = tmpTag .indexOf ("\" " , hrefIndex + 6 );
242- href = tmpTag .substring (hrefIndex + 6 , hrefEndIndex );
249+ href = tmpTag .substring (hrefIndex + 6 , hrefEndIndex );
243250
244251 tmpTag = tmpTag .substring (0 , hrefIndex ) + tmpTag .substring (hrefEndIndex + 1 , tmpTag .length () - 1 );
245252
246- result += href + SSStrU .backslashRBackslashN ;
253+ result += "<div>" + "<a href= \" " + href + " \" >" + title + "</a>" + "</div>" + SSStrU .backslashRBackslashN ;
247254
248255 }else {
249- result += "link " + ": " + href + SSStrU .backslashRBackslashN ;
250-
256+ result += "<div> " + "<a href= \" " + href + " \" >" + href + "</a>" + "</div>" + SSStrU .backslashRBackslashN ;
257+
251258 tmpTag = tmpTag .substring (0 , hrefIndex ) + tmpTag .substring (hrefEndIndex + 1 , tmpTag .length () - 1 );
252259 }
253260 }
@@ -258,13 +265,12 @@ public static String reduceXHTMLToTextAndImage(final String path) throws Excepti
258265 line = line .replace ("&nbsp;" , SSStrU .empty ).replace ("Â" , SSStrU .empty ).trim ();
259266
260267 if (!line .isEmpty ()){
261- result += line + SSStrU .backslashRBackslashN ;
268+ result += "<div>" + line + "</div>" + SSStrU .backslashRBackslashN ;
262269 }
263270 }
264271
265272 result +=
266273 "</body>\n "
267- + "\n "
268274 + "</html>" ;
269275
270276 return result ;
@@ -333,13 +339,16 @@ private String downnloadNoteResourcesAndFillXHTMLWithLocalImageLinks(
333339 }else {
334340 endIndex = endIndex2 ;
335341 }
336- // imageGif
342+
337343 if (//application/pdf //application/vnd.openxmlformats-officedocument.presentationml.presentation //application/msword //application/vnd.openxmlformats-officedocument.wordprocessingml.document
338344 !(tmpLine .contains ("type=\" " + SSMimeTypeU .imagePng + "\" " ) &&
339345 endIndex > tmpLine .indexOf ("type=\" " + SSMimeTypeU .imagePng + "\" " )
340346 ) &&
341347 !(tmpLine .contains ("type=\" " + SSMimeTypeU .imageJpeg + "\" " ) &&
342348 endIndex > tmpLine .indexOf ("type=\" " + SSMimeTypeU .imageJpeg + "\" " )
349+ ) &&
350+ !(tmpLine .contains ("type=\" " + SSMimeTypeU .imageGif + "\" " ) &&
351+ endIndex > tmpLine .indexOf ("type=\" " + SSMimeTypeU .imageGif + "\" " )
343352 )){
344353
345354 if (endIndex == endIndex1 ){
0 commit comments