001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019import java.io.IOException; 020import java.io.Writer; 021 022import org.apache.commons.lang3.text.translate.AggregateTranslator; 023import org.apache.commons.lang3.text.translate.CharSequenceTranslator; 024import org.apache.commons.lang3.text.translate.EntityArrays; 025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper; 026import org.apache.commons.lang3.text.translate.LookupTranslator; 027import org.apache.commons.lang3.text.translate.NumericEntityEscaper; 028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper; 029import org.apache.commons.lang3.text.translate.OctalUnescaper; 030import org.apache.commons.lang3.text.translate.UnicodeUnescaper; 031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover; 032 033/** 034 * Escapes and unescapes {@link String}s for 035 * Java, Java Script, HTML and XML. 036 * 037 * <p>#ThreadSafe#</p> 038 * @since 2.0 039 * @deprecated As of 3.6, use Apache Commons Text 040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html"> 041 * StringEscapeUtils</a> instead 042 */ 043@Deprecated 044public class StringEscapeUtils { 045 046 /* ESCAPE TRANSLATORS */ 047 048 static class CsvEscaper extends CharSequenceTranslator { 049 050 private static final char CSV_DELIMITER = ','; 051 private static final char CSV_QUOTE = '"'; 052 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 053 private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF }; 054 055 @Override 056 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 057 058 if (index != 0) { 059 throw new IllegalStateException("CsvEscaper should never reach the [1] index"); 060 } 061 062 if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) { 063 out.write(input.toString()); 064 } else { 065 out.write(CSV_QUOTE); 066 out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR)); 067 out.write(CSV_QUOTE); 068 } 069 return Character.codePointCount(input, 0, input.length()); 070 } 071 } 072 073 static class CsvUnescaper extends CharSequenceTranslator { 074 075 private static final char CSV_DELIMITER = ','; 076 private static final char CSV_QUOTE = '"'; 077 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); 078 private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; 079 080 @Override 081 public int translate(final CharSequence input, final int index, final Writer out) throws IOException { 082 083 if (index != 0) { 084 throw new IllegalStateException("CsvUnescaper should never reach the [1] index"); 085 } 086 087 if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { 088 out.write(input.toString()); 089 return Character.codePointCount(input, 0, input.length()); 090 } 091 092 // strip quotes 093 final String quoteless = input.subSequence(1, input.length() - 1).toString(); 094 095 if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { 096 // deal with escaped quotes; ie) "" 097 out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR)); 098 } else { 099 out.write(input.toString()); 100 } 101 return Character.codePointCount(input, 0, input.length()); 102 } 103 } 104 105 /** 106 * Translator object for escaping Java. 107 * 108 * While {@link #escapeJava(String)} is the expected method of use, this 109 * object allows the Java escaping functionality to be used 110 * as the foundation for a custom translator. 111 * 112 * @since 3.0 113 */ 114 public static final CharSequenceTranslator ESCAPE_JAVA = 115 new LookupTranslator( 116 new String[][] { 117 {"\"", "\\\""}, 118 {"\\", "\\\\"}, 119 }).with( 120 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()) 121 ).with( 122 JavaUnicodeEscaper.outsideOf(32, 0x7f) 123 ); 124 125 /** 126 * Translator object for escaping EcmaScript/JavaScript. 127 * 128 * While {@link #escapeEcmaScript(String)} is the expected method of use, this 129 * object allows the EcmaScript escaping functionality to be used 130 * as the foundation for a custom translator. 131 * 132 * @since 3.0 133 */ 134 public static final CharSequenceTranslator ESCAPE_ECMASCRIPT = 135 new AggregateTranslator( 136 new LookupTranslator( 137 new String[][] { 138 {"'", "\\'"}, 139 {"\"", "\\\""}, 140 {"\\", "\\\\"}, 141 {"/", "\\/"} 142 }), 143 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 144 JavaUnicodeEscaper.outsideOf(32, 0x7f) 145 ); 146 147 /** 148 * Translator object for escaping Json. 149 * 150 * While {@link #escapeJson(String)} is the expected method of use, this 151 * object allows the Json escaping functionality to be used 152 * as the foundation for a custom translator. 153 * 154 * @since 3.2 155 */ 156 public static final CharSequenceTranslator ESCAPE_JSON = 157 new AggregateTranslator( 158 new LookupTranslator( 159 new String[][] { 160 {"\"", "\\\""}, 161 {"\\", "\\\\"}, 162 {"/", "\\/"} 163 }), 164 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()), 165 JavaUnicodeEscaper.outsideOf(32, 0x7f) 166 ); 167 168 /** 169 * Translator object for escaping XML. 170 * 171 * While {@link #escapeXml(String)} is the expected method of use, this 172 * object allows the XML escaping functionality to be used 173 * as the foundation for a custom translator. 174 * 175 * @since 3.0 176 * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead. 177 */ 178 @Deprecated 179 public static final CharSequenceTranslator ESCAPE_XML = 180 new AggregateTranslator( 181 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 182 new LookupTranslator(EntityArrays.APOS_ESCAPE()) 183 ); 184 185 /** 186 * Translator object for escaping XML 1.0. 187 * 188 * While {@link #escapeXml10(String)} is the expected method of use, this 189 * object allows the XML escaping functionality to be used 190 * as the foundation for a custom translator. 191 * 192 * @since 3.3 193 */ 194 public static final CharSequenceTranslator ESCAPE_XML10 = 195 new AggregateTranslator( 196 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 197 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 198 new LookupTranslator( 199 new String[][] { 200 { "\u0000", StringUtils.EMPTY }, 201 { "\u0001", StringUtils.EMPTY }, 202 { "\u0002", StringUtils.EMPTY }, 203 { "\u0003", StringUtils.EMPTY }, 204 { "\u0004", StringUtils.EMPTY }, 205 { "\u0005", StringUtils.EMPTY }, 206 { "\u0006", StringUtils.EMPTY }, 207 { "\u0007", StringUtils.EMPTY }, 208 { "\u0008", StringUtils.EMPTY }, 209 { "\u000b", StringUtils.EMPTY }, 210 { "\u000c", StringUtils.EMPTY }, 211 { "\u000e", StringUtils.EMPTY }, 212 { "\u000f", StringUtils.EMPTY }, 213 { "\u0010", StringUtils.EMPTY }, 214 { "\u0011", StringUtils.EMPTY }, 215 { "\u0012", StringUtils.EMPTY }, 216 { "\u0013", StringUtils.EMPTY }, 217 { "\u0014", StringUtils.EMPTY }, 218 { "\u0015", StringUtils.EMPTY }, 219 { "\u0016", StringUtils.EMPTY }, 220 { "\u0017", StringUtils.EMPTY }, 221 { "\u0018", StringUtils.EMPTY }, 222 { "\u0019", StringUtils.EMPTY }, 223 { "\u001a", StringUtils.EMPTY }, 224 { "\u001b", StringUtils.EMPTY }, 225 { "\u001c", StringUtils.EMPTY }, 226 { "\u001d", StringUtils.EMPTY }, 227 { "\u001e", StringUtils.EMPTY }, 228 { "\u001f", StringUtils.EMPTY }, 229 { "\ufffe", StringUtils.EMPTY }, 230 { "\uffff", StringUtils.EMPTY } 231 }), 232 NumericEntityEscaper.between(0x7f, 0x84), 233 NumericEntityEscaper.between(0x86, 0x9f), 234 new UnicodeUnpairedSurrogateRemover() 235 ); 236 237 /** 238 * Translator object for escaping XML 1.1. 239 * 240 * While {@link #escapeXml11(String)} is the expected method of use, this 241 * object allows the XML escaping functionality to be used 242 * as the foundation for a custom translator. 243 * 244 * @since 3.3 245 */ 246 public static final CharSequenceTranslator ESCAPE_XML11 = 247 new AggregateTranslator( 248 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 249 new LookupTranslator(EntityArrays.APOS_ESCAPE()), 250 new LookupTranslator( 251 new String[][] { 252 { "\u0000", StringUtils.EMPTY }, 253 { "\u000b", "" }, 254 { "\u000c", "" }, 255 { "\ufffe", StringUtils.EMPTY }, 256 { "\uffff", StringUtils.EMPTY } 257 }), 258 NumericEntityEscaper.between(0x1, 0x8), 259 NumericEntityEscaper.between(0xe, 0x1f), 260 NumericEntityEscaper.between(0x7f, 0x84), 261 NumericEntityEscaper.between(0x86, 0x9f), 262 new UnicodeUnpairedSurrogateRemover() 263 ); 264 265 /** 266 * Translator object for escaping HTML version 3.0. 267 * 268 * While {@link #escapeHtml3(String)} is the expected method of use, this 269 * object allows the HTML escaping functionality to be used 270 * as the foundation for a custom translator. 271 * 272 * @since 3.0 273 */ 274 public static final CharSequenceTranslator ESCAPE_HTML3 = 275 new AggregateTranslator( 276 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 277 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()) 278 ); 279 280 /** 281 * Translator object for escaping HTML version 4.0. 282 * 283 * While {@link #escapeHtml4(String)} is the expected method of use, this 284 * object allows the HTML escaping functionality to be used 285 * as the foundation for a custom translator. 286 * 287 * @since 3.0 288 */ 289 public static final CharSequenceTranslator ESCAPE_HTML4 = 290 new AggregateTranslator( 291 new LookupTranslator(EntityArrays.BASIC_ESCAPE()), 292 new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()), 293 new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE()) 294 ); 295 296 /* UNESCAPE TRANSLATORS */ 297 298 /** 299 * Translator object for escaping individual Comma Separated Values. 300 * 301 * While {@link #escapeCsv(String)} is the expected method of use, this 302 * object allows the CSV escaping functionality to be used 303 * as the foundation for a custom translator. 304 * 305 * @since 3.0 306 */ 307 public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper(); 308 309 /** 310 * Translator object for unescaping escaped Java. 311 * 312 * While {@link #unescapeJava(String)} is the expected method of use, this 313 * object allows the Java unescaping functionality to be used 314 * as the foundation for a custom translator. 315 * 316 * @since 3.0 317 */ 318 // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)? 319 public static final CharSequenceTranslator UNESCAPE_JAVA = 320 new AggregateTranslator( 321 new OctalUnescaper(), // .between('\1', '\377'), 322 new UnicodeUnescaper(), 323 new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()), 324 new LookupTranslator( 325 new String[][] { 326 {"\\\\", "\\"}, 327 {"\\\"", "\""}, 328 {"\\'", "'"}, 329 {"\\", ""} 330 }) 331 ); 332 333 /** 334 * Translator object for unescaping escaped EcmaScript. 335 * 336 * While {@link #unescapeEcmaScript(String)} is the expected method of use, this 337 * object allows the EcmaScript unescaping functionality to be used 338 * as the foundation for a custom translator. 339 * 340 * @since 3.0 341 */ 342 public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; 343 344 /** 345 * Translator object for unescaping escaped Json. 346 * 347 * While {@link #unescapeJson(String)} is the expected method of use, this 348 * object allows the Json unescaping functionality to be used 349 * as the foundation for a custom translator. 350 * 351 * @since 3.2 352 */ 353 public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; 354 355 /** 356 * Translator object for unescaping escaped HTML 3.0. 357 * 358 * While {@link #unescapeHtml3(String)} is the expected method of use, this 359 * object allows the HTML unescaping functionality to be used 360 * as the foundation for a custom translator. 361 * 362 * @since 3.0 363 */ 364 public static final CharSequenceTranslator UNESCAPE_HTML3 = 365 new AggregateTranslator( 366 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 367 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 368 new NumericEntityUnescaper() 369 ); 370 371 /** 372 * Translator object for unescaping escaped HTML 4.0. 373 * 374 * While {@link #unescapeHtml4(String)} is the expected method of use, this 375 * object allows the HTML unescaping functionality to be used 376 * as the foundation for a custom translator. 377 * 378 * @since 3.0 379 */ 380 public static final CharSequenceTranslator UNESCAPE_HTML4 = 381 new AggregateTranslator( 382 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 383 new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()), 384 new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()), 385 new NumericEntityUnescaper() 386 ); 387 388 /** 389 * Translator object for unescaping escaped XML. 390 * 391 * While {@link #unescapeXml(String)} is the expected method of use, this 392 * object allows the XML unescaping functionality to be used 393 * as the foundation for a custom translator. 394 * 395 * @since 3.0 396 */ 397 public static final CharSequenceTranslator UNESCAPE_XML = 398 new AggregateTranslator( 399 new LookupTranslator(EntityArrays.BASIC_UNESCAPE()), 400 new LookupTranslator(EntityArrays.APOS_UNESCAPE()), 401 new NumericEntityUnescaper() 402 ); 403 404 /** 405 * Translator object for unescaping escaped Comma Separated Value entries. 406 * 407 * While {@link #unescapeCsv(String)} is the expected method of use, this 408 * object allows the CSV unescaping functionality to be used 409 * as the foundation for a custom translator. 410 * 411 * @since 3.0 412 */ 413 public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper(); 414 415 /* Helper functions */ 416 417 /** 418 * Returns a {@link String} value for a CSV column enclosed in double quotes, 419 * if required. 420 * 421 * <p>If the value contains a comma, newline or double quote, then the 422 * String value is returned enclosed in double quotes.</p> 423 * 424 * <p>Any double quote characters in the value are escaped with another double quote.</p> 425 * 426 * <p>If the value does not contain a comma, newline or double quote, then the 427 * String value is returned unchanged.</p> 428 * 429 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 430 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 431 * 432 * @param input the input CSV column String, may be null 433 * @return the input String, enclosed in double quotes if the value contains a comma, 434 * newline or double quote, {@code null} if null string input 435 * @since 2.4 436 */ 437 public static final String escapeCsv(final String input) { 438 return ESCAPE_CSV.translate(input); 439 } 440 441 /** 442 * Escapes the characters in a {@link String} using EcmaScript String rules. 443 * <p>Escapes any values it finds into their EcmaScript String form. 444 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 445 * 446 * <p>So a tab becomes the characters {@code '\\'} and 447 * {@code 't'}.</p> 448 * 449 * <p>The only difference between Java strings and EcmaScript strings 450 * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p> 451 * 452 * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p> 453 * 454 * <p>Example:</p> 455 * <pre> 456 * input string: He didn't say, "Stop!" 457 * output string: He didn\'t say, \"Stop!\" 458 * </pre> 459 * 460 * @param input String to escape values in, may be null 461 * @return String with escaped values, {@code null} if null string input 462 * @since 3.0 463 */ 464 public static final String escapeEcmaScript(final String input) { 465 return ESCAPE_ECMASCRIPT.translate(input); 466 } 467 468 /** 469 * Escapes the characters in a {@link String} using HTML entities. 470 * <p>Supports only the HTML 3.0 entities.</p> 471 * 472 * @param input the {@link String} to escape, may be null 473 * @return a new escaped {@link String}, {@code null} if null string input 474 * @since 3.0 475 */ 476 public static final String escapeHtml3(final String input) { 477 return ESCAPE_HTML3.translate(input); 478 } 479 480 /** 481 * Escapes the characters in a {@link String} using HTML entities. 482 * 483 * <p> 484 * For example: 485 * </p> 486 * <p>{@code "bread" & "butter"}</p> 487 * becomes: 488 * <p> 489 * {@code &quot;bread&quot; &amp; &quot;butter&quot;}. 490 * </p> 491 * 492 * <p>Supports all known HTML 4.0 entities, including funky accents. 493 * Note that the commonly used apostrophe escape character (&apos;) 494 * is not a legal entity and so is not supported).</p> 495 * 496 * @param input the {@link String} to escape, may be null 497 * @return a new escaped {@link String}, {@code null} if null string input 498 * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 499 * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 500 * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 501 * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 502 * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 503 * @since 3.0 504 */ 505 public static final String escapeHtml4(final String input) { 506 return ESCAPE_HTML4.translate(input); 507 } 508 509 /** 510 * Escapes the characters in a {@link String} using Java String rules. 511 * 512 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 513 * 514 * <p>So a tab becomes the characters {@code '\\'} and 515 * {@code 't'}.</p> 516 * 517 * <p>The only difference between Java strings and JavaScript strings 518 * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p> 519 * 520 * <p>Example:</p> 521 * <pre> 522 * input string: He didn't say, "Stop!" 523 * output string: He didn't say, \"Stop!\" 524 * </pre> 525 * 526 * @param input String to escape values in, may be null 527 * @return String with escaped values, {@code null} if null string input 528 */ 529 public static final String escapeJava(final String input) { 530 return ESCAPE_JAVA.translate(input); 531 } 532 533 /** 534 * Escapes the characters in a {@link String} using Json String rules. 535 * <p>Escapes any values it finds into their Json String form. 536 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p> 537 * 538 * <p>So a tab becomes the characters {@code '\\'} and 539 * {@code 't'}.</p> 540 * 541 * <p>The only difference between Java strings and Json strings 542 * is that in Json, forward-slash (/) is escaped.</p> 543 * 544 * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p> 545 * 546 * <p>Example:</p> 547 * <pre> 548 * input string: He didn't say, "Stop!" 549 * output string: He didn't say, \"Stop!\" 550 * </pre> 551 * 552 * @param input String to escape values in, may be null 553 * @return String with escaped values, {@code null} if null string input 554 * @since 3.2 555 */ 556 public static final String escapeJson(final String input) { 557 return ESCAPE_JSON.translate(input); 558 } 559 560 /** 561 * Escapes the characters in a {@link String} using XML entities. 562 * 563 * <p>For example: {@code "bread" & "butter"} => 564 * {@code "bread" & "butter"}. 565 * </p> 566 * 567 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 568 * Does not support DTDs or external entities.</p> 569 * 570 * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer 571 * escaped. If you still wish this functionality, you can achieve it 572 * via the following: 573 * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p> 574 * 575 * @param input the {@link String} to escape, may be null 576 * @return a new escaped {@link String}, {@code null} if null string input 577 * @see #unescapeXml(String) 578 * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead. 579 */ 580 @Deprecated 581 public static final String escapeXml(final String input) { 582 return ESCAPE_XML.translate(input); 583 } 584 585 /** 586 * Escapes the characters in a {@link String} using XML entities. 587 * <p> 588 * For example: 589 * </p> 590 * 591 * <pre>{@code 592 * "bread" & "butter" 593 * }</pre> 594 * <p> 595 * converts to: 596 * </p> 597 * 598 * <pre> 599 * {@code 600 * "bread" & "butter" 601 * } 602 * </pre> 603 * 604 * <p> 605 * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The 606 * method {@code escapeXml10} will remove characters that do not fit in the following ranges: 607 * </p> 608 * 609 * <p> 610 * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]} 611 * </p> 612 * 613 * <p> 614 * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges: 615 * </p> 616 * 617 * <p> 618 * {@code [#x7F-#x84] | [#x86-#x9F]} 619 * </p> 620 * 621 * <p> 622 * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use 623 * {@link #escapeXml11(String)}. 624 * </p> 625 * 626 * @param input the {@link String} to escape, may be null 627 * @return a new escaped {@link String}, {@code null} if null string input 628 * @see #unescapeXml(String) 629 * @since 3.3 630 */ 631 public static String escapeXml10(final String input) { 632 return ESCAPE_XML10.translate(input); 633 } 634 635 /** 636 * Escapes the characters in a {@link String} using XML entities. 637 * 638 * <p>For example: {@code "bread" & "butter"} => 639 * {@code "bread" & "butter"}. 640 * </p> 641 * 642 * <p>XML 1.1 can represent certain control characters, but it cannot represent 643 * the null byte or unpaired Unicode surrogate code points, even after escaping. 644 * {@code escapeXml11} will remove characters that do not fit in the following 645 * ranges:</p> 646 * 647 * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p> 648 * 649 * <p>{@code escapeXml11} will escape characters in the following ranges:</p> 650 * 651 * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p> 652 * 653 * <p>The returned string can be inserted into a valid XML 1.1 document. Do not 654 * use it for XML 1.0 documents.</p> 655 * 656 * @param input the {@link String} to escape, may be null 657 * @return a new escaped {@link String}, {@code null} if null string input 658 * @see #unescapeXml(String) 659 * @since 3.3 660 */ 661 public static String escapeXml11(final String input) { 662 return ESCAPE_XML11.translate(input); 663 } 664 665 /** 666 * Returns a {@link String} value for an unescaped CSV column. 667 * 668 * <p>If the value is enclosed in double quotes, and contains a comma, newline 669 * or double quote, then quotes are removed. 670 * </p> 671 * 672 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped 673 * to just one double quote.</p> 674 * 675 * <p>If the value is not enclosed in double quotes, or is and does not contain a 676 * comma, newline or double quote, then the String value is returned unchanged.</p> 677 * 678 * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and 679 * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>. 680 * 681 * @param input the input CSV column String, may be null 682 * @return the input String, with enclosing double quotes removed and embedded double 683 * quotes unescaped, {@code null} if null string input 684 * @since 2.4 685 */ 686 public static final String unescapeCsv(final String input) { 687 return UNESCAPE_CSV.translate(input); 688 } 689 690 /** 691 * Unescapes any EcmaScript literals found in the {@link String}. 692 * 693 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 694 * into a newline character, unless the {@code '\'} is preceded by another 695 * {@code '\'}.</p> 696 * 697 * @see #unescapeJava(String) 698 * @param input the {@link String} to unescape, may be null 699 * @return A new unescaped {@link String}, {@code null} if null string input 700 * @since 3.0 701 */ 702 public static final String unescapeEcmaScript(final String input) { 703 return UNESCAPE_ECMASCRIPT.translate(input); 704 } 705 706 /** 707 * Unescapes a string containing entity escapes to a string 708 * containing the actual Unicode characters corresponding to the 709 * escapes. Supports only HTML 3.0 entities. 710 * 711 * @param input the {@link String} to unescape, may be null 712 * @return a new unescaped {@link String}, {@code null} if null string input 713 * @since 3.0 714 */ 715 public static final String unescapeHtml3(final String input) { 716 return UNESCAPE_HTML3.translate(input); 717 } 718 719 /** 720 * Unescapes a string containing entity escapes to a string 721 * containing the actual Unicode characters corresponding to the 722 * escapes. Supports HTML 4.0 entities. 723 * 724 * <p>For example, the string {@code "<Français>"} 725 * will become {@code "<Français>"}</p> 726 * 727 * <p>If an entity is unrecognized, it is left alone, and inserted 728 * verbatim into the result string. e.g. {@code ">&zzzz;x"} will 729 * become {@code ">&zzzz;x"}.</p> 730 * 731 * @param input the {@link String} to unescape, may be null 732 * @return a new unescaped {@link String}, {@code null} if null string input 733 * @since 3.0 734 */ 735 public static final String unescapeHtml4(final String input) { 736 return UNESCAPE_HTML4.translate(input); 737 } 738 739 /** 740 * Unescapes any Java literals found in the {@link String}. 741 * For example, it will turn a sequence of {@code '\'} and 742 * {@code 'n'} into a newline character, unless the {@code '\'} 743 * is preceded by another {@code '\'}. 744 * 745 * @param input the {@link String} to unescape, may be null 746 * @return a new unescaped {@link String}, {@code null} if null string input 747 */ 748 public static final String unescapeJava(final String input) { 749 return UNESCAPE_JAVA.translate(input); 750 } 751 752 /** 753 * Unescapes any Json literals found in the {@link String}. 754 * 755 * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'} 756 * into a newline character, unless the {@code '\'} is preceded by another 757 * {@code '\'}.</p> 758 * 759 * @see #unescapeJava(String) 760 * @param input the {@link String} to unescape, may be null 761 * @return A new unescaped {@link String}, {@code null} if null string input 762 * @since 3.2 763 */ 764 public static final String unescapeJson(final String input) { 765 return UNESCAPE_JSON.translate(input); 766 } 767 768 /** 769 * Unescapes a string containing XML entity escapes to a string 770 * containing the actual Unicode characters corresponding to the 771 * escapes. 772 * 773 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos). 774 * Does not support DTDs or external entities.</p> 775 * 776 * <p>Note that numerical \\u Unicode codes are unescaped to their respective 777 * Unicode characters. This may change in future releases.</p> 778 * 779 * @param input the {@link String} to unescape, may be null 780 * @return a new unescaped {@link String}, {@code null} if null string input 781 * @see #escapeXml(String) 782 * @see #escapeXml10(String) 783 * @see #escapeXml11(String) 784 */ 785 public static final String unescapeXml(final String input) { 786 return UNESCAPE_XML.translate(input); 787 } 788 789 /** 790 * {@link StringEscapeUtils} instances should NOT be constructed in 791 * standard programming. 792 * 793 * <p>Instead, the class should be used as:</p> 794 * <pre>StringEscapeUtils.escapeJava("foo");</pre> 795 * 796 * <p>This constructor is public to permit tools that require a JavaBean 797 * instance to operate.</p> 798 * 799 * @deprecated TODO Make private in 4.0. 800 */ 801 @Deprecated 802 public StringEscapeUtils() { 803 // empty 804 } 805 806}