Skip to content

Commit cbaa951

Browse files
Merge pull request #479 from gainan/issue_451
Extract charset encoding from HTTP headers or HTML headers.
2 parents 823e401 + 6d99316 commit cbaa951

File tree

2 files changed

+73
-6
lines changed

2 files changed

+73
-6
lines changed

cSploit/src/main/java/org/csploit/android/net/http/RequestParser.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
import java.util.ArrayList;
2828
import java.util.Arrays;
2929
import java.util.Iterator;
30+
import java.util.regex.Matcher;
31+
import java.util.regex.Pattern;
3032

3133
public class RequestParser
3234
{
@@ -478,4 +480,48 @@ public static ArrayList<HttpCookie> getCookiesFromHeaders(ArrayList<String> head
478480

479481
return null;
480482
}
483+
484+
/**
485+
* extract the charset encoding from the HTTP response headers.
486+
*
487+
* @param contentType content-type header to be parsed
488+
* @return returns the charset encoding if we've found it, or null.
489+
*/
490+
public static String getCharsetFromHeaders(String contentType){
491+
if (contentType != null && contentType.toLowerCase().trim().contains("charset=")){
492+
String[] parts = contentType.toLowerCase().trim().split("=");
493+
if (parts.length > 0)
494+
return parts[1];
495+
}
496+
497+
return null;
498+
}
499+
500+
/**
501+
* extract the charset encoding of a web site from the <meta> headers.
502+
*
503+
* @param body html body of the site to be parsed
504+
* @return returns the charset encoding if we've found it, or null.
505+
*/
506+
public static String getCharsetFromBody(String body) {
507+
if (body != null) {
508+
int headEnd = body.toLowerCase().trim().indexOf("</head>");
509+
510+
// return null if there's no head tags
511+
if (headEnd == -1)
512+
return null;
513+
514+
String body_head = body.toLowerCase().substring(0, headEnd);
515+
516+
Pattern p = Pattern.compile("charset=([\"\'a-z0-9A-Z-]+)");
517+
Matcher m = p.matcher(body_head);
518+
String str_match = "";
519+
if (m.find()) {
520+
str_match = m.toMatchResult().group(1);
521+
return str_match.replaceAll("[\"']", "");
522+
}
523+
}
524+
525+
return null;
526+
}
481527
}

cSploit/src/main/java/org/csploit/android/net/http/proxy/StreamThread.java

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,17 @@
1818
*/
1919
package org.csploit.android.net.http.proxy;
2020

21-
import java.io.IOException;
22-
import java.io.InputStream;
23-
import java.io.OutputStream;
24-
21+
import org.csploit.android.core.Logger;
2522
import org.csploit.android.core.Profiler;
2623
import org.csploit.android.core.System;
27-
import org.csploit.android.core.Logger;
2824
import org.csploit.android.net.ByteBuffer;
2925
import org.csploit.android.net.http.RequestParser;
3026

27+
import java.io.IOException;
28+
import java.io.InputStream;
29+
import java.io.OutputStream;
30+
import java.io.UnsupportedEncodingException;
31+
3132
public class StreamThread implements Runnable
3233
{
3334
private final static String[] FILTERED_CONTENT_TYPES = new String[]
@@ -159,7 +160,27 @@ public void run(){
159160

160161
headers = patched;
161162

162-
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
163+
// try to get the charset encoding from the HTTP headers.
164+
String charset = RequestParser.getCharsetFromHeaders(contentType);
165+
166+
// if we haven't found the charset encoding on the HTTP headers, try it out on the body.
167+
if (charset == null) {
168+
charset = RequestParser.getCharsetFromBody(body);
169+
}
170+
171+
if (charset != null) {
172+
try {
173+
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes(charset));
174+
}
175+
catch (UnsupportedEncodingException e){
176+
Logger.error("UnsupportedEncoding: " + e.getLocalizedMessage());
177+
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
178+
}
179+
}
180+
else {
181+
// if we haven't found the charset encoding, just handle it on ByteBuffer()
182+
mBuffer.setData((headers + HEAD_SEPARATOR + body).getBytes());
183+
}
163184

164185
mWriter.write(mBuffer.getData());
165186
mWriter.flush();

0 commit comments

Comments
 (0)