|
| 1 | +/** |
| 2 | + * Jooby https://jooby.io |
| 3 | + * Apache License Version 2.0 https://jooby.io/LICENSE.txt |
| 4 | + * Copyright 2014 Edgar Espina |
| 5 | + */ |
| 6 | +package io.jooby; |
| 7 | + |
| 8 | +import org.unbescape.html.HtmlEscape; |
| 9 | +import org.unbescape.javascript.JavaScriptEscape; |
| 10 | +import org.unbescape.json.JsonEscape; |
| 11 | +import org.unbescape.uri.UriEscape; |
| 12 | +import org.unbescape.xml.XmlEscape; |
| 13 | + |
| 14 | +import javax.annotation.Nullable; |
| 15 | + |
| 16 | +/** |
| 17 | + * Set of escaping routines for fixing cross-site scripting (XSS). |
| 18 | + */ |
| 19 | +public final class XSS { |
| 20 | + private XSS() {} |
| 21 | + |
| 22 | + /** |
| 23 | + * <p> |
| 24 | + * Perform am URI path <strong>escape</strong> operation |
| 25 | + * on a <tt>String</tt> input using <tt>UTF-8</tt> as encoding. |
| 26 | + * </p> |
| 27 | + * <p> |
| 28 | + * The following are the only allowed chars in an URI path (will not be escaped): |
| 29 | + * </p> |
| 30 | + * <ul> |
| 31 | + * <li><tt>A-Z a-z 0-9</tt></li> |
| 32 | + * <li><tt>- . _ ~</tt></li> |
| 33 | + * <li><tt>! $ & ' ( ) * + , ; =</tt></li> |
| 34 | + * <li><tt>: @</tt></li> |
| 35 | + * <li><tt>/</tt></li> |
| 36 | + * </ul> |
| 37 | + * <p> |
| 38 | + * All other chars will be escaped by converting them to the sequence of bytes that |
| 39 | + * represents them in the <tt>UTF-8</tt> and then representing each byte |
| 40 | + * in <tt>%HH</tt> syntax, being <tt>HH</tt> the hexadecimal representation of the byte. |
| 41 | + * </p> |
| 42 | + * <p> |
| 43 | + * This method is <strong>thread-safe</strong>. |
| 44 | + * </p> |
| 45 | + * |
| 46 | + * @param value the <tt>String</tt> to be escaped. |
| 47 | + * @return The escaped result <tt>String</tt>. As a memory-performance improvement, will return the exact |
| 48 | + * same object as the <tt>text</tt> input argument if no escaping modifications were required (and |
| 49 | + * no additional <tt>String</tt> objects will be created during processing). Will |
| 50 | + * return <tt>null</tt> if input is <tt>null</tt>. |
| 51 | + */ |
| 52 | + public static @Nullable String uri(@Nullable String value) { |
| 53 | + return UriEscape.escapeUriPath(value); |
| 54 | + } |
| 55 | + |
| 56 | + /** |
| 57 | + * <p> |
| 58 | + * Perform an HTML5 level 2 (result is ASCII) <strong>escape</strong> operation on a <tt>String</tt> input. |
| 59 | + * </p> |
| 60 | + * <p> |
| 61 | + * <em>Level 2</em> means this method will escape: |
| 62 | + * </p> |
| 63 | + * <ul> |
| 64 | + * <li>The five markup-significant characters: <tt><</tt>, <tt>></tt>, <tt>&</tt>, |
| 65 | + * <tt>"</tt> and <tt>'</tt></li> |
| 66 | + * <li>All non ASCII characters.</li> |
| 67 | + * </ul> |
| 68 | + * <p> |
| 69 | + * This escape will be performed by replacing those chars by the corresponding HTML5 Named Character References |
| 70 | + * (e.g. <tt>'&acute;'</tt>) when such NCR exists for the replaced character, and replacing by a decimal |
| 71 | + * character reference (e.g. <tt>'&#8345;'</tt>) when there there is no NCR for the replaced character. |
| 72 | + * </p> |
| 73 | + * <p> |
| 74 | + * This method is <strong>thread-safe</strong>. |
| 75 | + * </p> |
| 76 | + * |
| 77 | + * @param value the <tt>String</tt> to be escaped. |
| 78 | + * @return The escaped result <tt>String</tt>. As a memory-performance improvement, will return the exact |
| 79 | + * same object as the <tt>text</tt> input argument if no escaping modifications were required (and |
| 80 | + * no additional <tt>String</tt> objects will be created during processing). Will |
| 81 | + * return <tt>null</tt> if input is <tt>null</tt>. |
| 82 | + */ |
| 83 | + public static @Nullable String html(@Nullable String value) { |
| 84 | + return HtmlEscape.escapeHtml5(value); |
| 85 | + } |
| 86 | + |
| 87 | + /** |
| 88 | + * <p> |
| 89 | + * Perform a JavaScript level 2 (basic set and all non-ASCII chars) <strong>escape</strong> operation |
| 90 | + * on a <tt>String</tt> input. |
| 91 | + * </p> |
| 92 | + * <p> |
| 93 | + * <em>Level 2</em> means this method will escape: |
| 94 | + * </p> |
| 95 | + * <ul> |
| 96 | + * <li>The JavaScript basic escape set: |
| 97 | + * <ul> |
| 98 | + * <li>The <em>Single Escape Characters</em>: |
| 99 | + * <tt>\0</tt> (<tt>U+0000</tt>), |
| 100 | + * <tt>\b</tt> (<tt>U+0008</tt>), |
| 101 | + * <tt>\t</tt> (<tt>U+0009</tt>), |
| 102 | + * <tt>\n</tt> (<tt>U+000A</tt>), |
| 103 | + * <tt>\v</tt> (<tt>U+000B</tt>), |
| 104 | + * <tt>\f</tt> (<tt>U+000C</tt>), |
| 105 | + * <tt>\r</tt> (<tt>U+000D</tt>), |
| 106 | + * <tt>\"</tt> (<tt>U+0022</tt>), |
| 107 | + * <tt>\'</tt> (<tt>U+0027</tt>), |
| 108 | + * <tt>\\</tt> (<tt>U+005C</tt>) and |
| 109 | + * <tt>\/</tt> (<tt>U+002F</tt>). |
| 110 | + * Note that <tt>\/</tt> is optional, and will only be used when the <tt>/</tt> |
| 111 | + * symbol appears after <tt><</tt>, as in <tt></</tt>. This is to avoid accidentally |
| 112 | + * closing <tt><script></tt> tags in HTML. Also, note that <tt>\v</tt> |
| 113 | + * (<tt>U+000B</tt>) is actually included as a Single Escape |
| 114 | + * Character in the JavaScript (ECMAScript) specification, but will not be used as it |
| 115 | + * is not supported by Microsoft Internet Explorer versions < 9. |
| 116 | + * </li> |
| 117 | + * <li> |
| 118 | + * Two ranges of non-displayable, control characters (some of which are already part of the |
| 119 | + * <em>single escape characters</em> list): <tt>U+0001</tt> to <tt>U+001F</tt> and |
| 120 | + * <tt>U+007F</tt> to <tt>U+009F</tt>. |
| 121 | + * </li> |
| 122 | + * </ul> |
| 123 | + * </li> |
| 124 | + * <li>All non ASCII characters.</li> |
| 125 | + * </ul> |
| 126 | + * <p> |
| 127 | + * This escape will be performed by using the Single Escape Chars whenever possible. For escaped |
| 128 | + * characters that do not have an associated SEC, default to using <tt>\xFF</tt> Hexadecimal Escapes |
| 129 | + * if possible (characters <= <tt>U+00FF</tt>), then default to <tt>\uFFFF</tt> |
| 130 | + * Hexadecimal Escapes. This type of escape <u>produces the smallest escaped string possible</u>. |
| 131 | + * </p> |
| 132 | + * <p> |
| 133 | + * This method is <strong>thread-safe</strong>. |
| 134 | + * </p> |
| 135 | + * |
| 136 | + * @param value the <tt>String</tt> to be escaped. |
| 137 | + * @return The escaped result <tt>String</tt>. As a memory-performance improvement, will return the exact |
| 138 | + * same object as the <tt>text</tt> input argument if no escaping modifications were required (and |
| 139 | + * no additional <tt>String</tt> objects will be created during processing). Will |
| 140 | + * return <tt>null</tt> if input is <tt>null</tt>. |
| 141 | + */ |
| 142 | + public static @Nullable String javaScript(@Nullable String value) { |
| 143 | + return JavaScriptEscape.escapeJavaScript(value); |
| 144 | + } |
| 145 | + |
| 146 | + /** |
| 147 | + * <p> |
| 148 | + * Perform a JSON level 2 (basic set and all non-ASCII chars) <strong>escape</strong> operation |
| 149 | + * on a <tt>String</tt> input. |
| 150 | + * </p> |
| 151 | + * <p> |
| 152 | + * <em>Level 2</em> means this method will escape: |
| 153 | + * </p> |
| 154 | + * <ul> |
| 155 | + * <li>The JSON basic escape set: |
| 156 | + * <ul> |
| 157 | + * <li>The <em>Single Escape Characters</em>: |
| 158 | + * <tt>\b</tt> (<tt>U+0008</tt>), |
| 159 | + * <tt>\t</tt> (<tt>U+0009</tt>), |
| 160 | + * <tt>\n</tt> (<tt>U+000A</tt>), |
| 161 | + * <tt>\f</tt> (<tt>U+000C</tt>), |
| 162 | + * <tt>\r</tt> (<tt>U+000D</tt>), |
| 163 | + * <tt>\"</tt> (<tt>U+0022</tt>), |
| 164 | + * <tt>\\</tt> (<tt>U+005C</tt>) and |
| 165 | + * <tt>\/</tt> (<tt>U+002F</tt>). |
| 166 | + * Note that <tt>\/</tt> is optional, and will only be used when the <tt>/</tt> |
| 167 | + * symbol appears after <tt><</tt>, as in <tt></</tt>. This is to avoid accidentally |
| 168 | + * closing <tt><script></tt> tags in HTML. |
| 169 | + * </li> |
| 170 | + * <li> |
| 171 | + * Two ranges of non-displayable, control characters (some of which are already part of the |
| 172 | + * <em>single escape characters</em> list): <tt>U+0000</tt> to <tt>U+001F</tt> (required |
| 173 | + * by the JSON spec) and <tt>U+007F</tt> to <tt>U+009F</tt> (additional). |
| 174 | + * </li> |
| 175 | + * </ul> |
| 176 | + * </li> |
| 177 | + * <li>All non ASCII characters.</li> |
| 178 | + * </ul> |
| 179 | + * <p> |
| 180 | + * This escape will be performed by using the Single Escape Chars whenever possible. For escaped |
| 181 | + * characters that do not have an associated SEC, default to <tt>\uFFFF</tt> |
| 182 | + * Hexadecimal Escapes. |
| 183 | + * </p> |
| 184 | + * <p> |
| 185 | + * This method is <strong>thread-safe</strong>. |
| 186 | + * </p> |
| 187 | + * |
| 188 | + * @param value the <tt>String</tt> to be escaped. |
| 189 | + * @return The escaped result <tt>String</tt>. As a memory-performance improvement, will return the exact |
| 190 | + * same object as the <tt>text</tt> input argument if no escaping modifications were required (and |
| 191 | + * no additional <tt>String</tt> objects will be created during processing). Will |
| 192 | + * return <tt>null</tt> if input is <tt>null</tt>. |
| 193 | + */ |
| 194 | + public static @Nullable String json(@Nullable String value) { |
| 195 | + return JsonEscape.escapeJson(value); |
| 196 | + } |
| 197 | + |
| 198 | + /** |
| 199 | + * <p> |
| 200 | + * Perform an XML 1.1 level 2 (markup-significant and all non-ASCII chars) <strong>escape</strong> operation |
| 201 | + * on a <tt>String</tt> input. |
| 202 | + * </p> |
| 203 | + * <p> |
| 204 | + * <em>Level 2</em> means this method will escape: |
| 205 | + * </p> |
| 206 | + * <ul> |
| 207 | + * <li>The five markup-significant characters: <tt><</tt>, <tt>></tt>, <tt>&</tt>, |
| 208 | + * <tt>"</tt> and <tt>'</tt></li> |
| 209 | + * <li>All non ASCII characters.</li> |
| 210 | + * </ul> |
| 211 | + * <p> |
| 212 | + * This escape will be performed by replacing those chars by the corresponding XML Character Entity References |
| 213 | + * (e.g. <tt>'&lt;'</tt>) when such CER exists for the replaced character, and replacing by a hexadecimal |
| 214 | + * character reference (e.g. <tt>'&#x2430;'</tt>) when there there is no CER for the replaced character. |
| 215 | + * </p> |
| 216 | + * <p> |
| 217 | + * This method is <strong>thread-safe</strong>. |
| 218 | + * </p> |
| 219 | + * |
| 220 | + * @param value the <tt>String</tt> to be escaped. |
| 221 | + * @return The escaped result <tt>String</tt>. As a memory-performance improvement, will return the exact |
| 222 | + * same object as the <tt>text</tt> input argument if no escaping modifications were required (and |
| 223 | + * no additional <tt>String</tt> objects will be created during processing). Will |
| 224 | + * return <tt>null</tt> if input is <tt>null</tt>. |
| 225 | + */ |
| 226 | + public static @Nullable String xml(@Nullable String value) { |
| 227 | + return XmlEscape.escapeXml11(value); |
| 228 | + } |
| 229 | +} |
0 commit comments