001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3;
018
019import java.io.IOException;
020import java.io.Writer;
021
022import org.apache.commons.lang3.text.translate.AggregateTranslator;
023import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
024import org.apache.commons.lang3.text.translate.EntityArrays;
025import org.apache.commons.lang3.text.translate.JavaUnicodeEscaper;
026import org.apache.commons.lang3.text.translate.LookupTranslator;
027import org.apache.commons.lang3.text.translate.NumericEntityEscaper;
028import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
029import org.apache.commons.lang3.text.translate.OctalUnescaper;
030import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
031import org.apache.commons.lang3.text.translate.UnicodeUnpairedSurrogateRemover;
032
033/**
034 * Escapes and unescapes {@link String}s for
035 * Java, Java Script, HTML and XML.
036 *
037 * <p>#ThreadSafe#</p>
038 * @since 2.0
039 * @deprecated As of 3.6, use Apache Commons Text
040 * <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/StringEscapeUtils.html">
041 * StringEscapeUtils</a> instead
042 */
043@Deprecated
044public class StringEscapeUtils {
045
046    /* ESCAPE TRANSLATORS */
047
048    static class CsvEscaper extends CharSequenceTranslator {
049
050        private static final char CSV_DELIMITER = ',';
051        private static final char CSV_QUOTE = '"';
052        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
053        private static final char[] CSV_SEARCH_CHARS = { CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF };
054
055        @Override
056        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
057
058            if (index != 0) {
059                throw new IllegalStateException("CsvEscaper should never reach the [1] index");
060            }
061
062            if (StringUtils.containsNone(input.toString(), CSV_SEARCH_CHARS)) {
063                out.write(input.toString());
064            } else {
065                out.write(CSV_QUOTE);
066                out.write(Strings.CS.replace(input.toString(), CSV_QUOTE_STR, CSV_QUOTE_STR + CSV_QUOTE_STR));
067                out.write(CSV_QUOTE);
068            }
069            return Character.codePointCount(input, 0, input.length());
070        }
071    }
072
073    static class CsvUnescaper extends CharSequenceTranslator {
074
075        private static final char CSV_DELIMITER = ',';
076        private static final char CSV_QUOTE = '"';
077        private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
078        private static final char[] CSV_SEARCH_CHARS = {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
079
080        @Override
081        public int translate(final CharSequence input, final int index, final Writer out) throws IOException {
082
083            if (index != 0) {
084                throw new IllegalStateException("CsvUnescaper should never reach the [1] index");
085            }
086
087            if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
088                out.write(input.toString());
089                return Character.codePointCount(input, 0, input.length());
090            }
091
092            // strip quotes
093            final String quoteless = input.subSequence(1, input.length() - 1).toString();
094
095            if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
096                // deal with escaped quotes; ie) ""
097                out.write(Strings.CS.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR));
098            } else {
099                out.write(input.toString());
100            }
101            return Character.codePointCount(input, 0, input.length());
102        }
103    }
104
105    /**
106     * Translator object for escaping Java.
107     *
108     * While {@link #escapeJava(String)} is the expected method of use, this
109     * object allows the Java escaping functionality to be used
110     * as the foundation for a custom translator.
111     *
112     * @since 3.0
113     */
114    public static final CharSequenceTranslator ESCAPE_JAVA =
115          new LookupTranslator(
116            new String[][] {
117              {"\"", "\\\""},
118              {"\\", "\\\\"},
119          }).with(
120            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())
121          ).with(
122            JavaUnicodeEscaper.outsideOf(32, 0x7f)
123        );
124
125    /**
126     * Translator object for escaping EcmaScript/JavaScript.
127     *
128     * While {@link #escapeEcmaScript(String)} is the expected method of use, this
129     * object allows the EcmaScript escaping functionality to be used
130     * as the foundation for a custom translator.
131     *
132     * @since 3.0
133     */
134    public static final CharSequenceTranslator ESCAPE_ECMASCRIPT =
135        new AggregateTranslator(
136            new LookupTranslator(
137                      new String[][] {
138                            {"'", "\\'"},
139                            {"\"", "\\\""},
140                            {"\\", "\\\\"},
141                            {"/", "\\/"}
142                      }),
143            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
144            JavaUnicodeEscaper.outsideOf(32, 0x7f)
145        );
146
147    /**
148     * Translator object for escaping Json.
149     *
150     * While {@link #escapeJson(String)} is the expected method of use, this
151     * object allows the Json escaping functionality to be used
152     * as the foundation for a custom translator.
153     *
154     * @since 3.2
155     */
156    public static final CharSequenceTranslator ESCAPE_JSON =
157        new AggregateTranslator(
158            new LookupTranslator(
159                      new String[][] {
160                            {"\"", "\\\""},
161                            {"\\", "\\\\"},
162                            {"/", "\\/"}
163                      }),
164            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()),
165            JavaUnicodeEscaper.outsideOf(32, 0x7f)
166        );
167
168    /**
169     * Translator object for escaping XML.
170     *
171     * While {@link #escapeXml(String)} is the expected method of use, this
172     * object allows the XML escaping functionality to be used
173     * as the foundation for a custom translator.
174     *
175     * @since 3.0
176     * @deprecated use {@link #ESCAPE_XML10} or {@link #ESCAPE_XML11} instead.
177     */
178    @Deprecated
179    public static final CharSequenceTranslator ESCAPE_XML =
180        new AggregateTranslator(
181            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
182            new LookupTranslator(EntityArrays.APOS_ESCAPE())
183        );
184
185    /**
186     * Translator object for escaping XML 1.0.
187     *
188     * While {@link #escapeXml10(String)} is the expected method of use, this
189     * object allows the XML escaping functionality to be used
190     * as the foundation for a custom translator.
191     *
192     * @since 3.3
193     */
194    public static final CharSequenceTranslator ESCAPE_XML10 =
195        new AggregateTranslator(
196            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
197            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
198            new LookupTranslator(
199                    new String[][] {
200                            { "\u0000", StringUtils.EMPTY },
201                            { "\u0001", StringUtils.EMPTY },
202                            { "\u0002", StringUtils.EMPTY },
203                            { "\u0003", StringUtils.EMPTY },
204                            { "\u0004", StringUtils.EMPTY },
205                            { "\u0005", StringUtils.EMPTY },
206                            { "\u0006", StringUtils.EMPTY },
207                            { "\u0007", StringUtils.EMPTY },
208                            { "\u0008", StringUtils.EMPTY },
209                            { "\u000b", StringUtils.EMPTY },
210                            { "\u000c", StringUtils.EMPTY },
211                            { "\u000e", StringUtils.EMPTY },
212                            { "\u000f", StringUtils.EMPTY },
213                            { "\u0010", StringUtils.EMPTY },
214                            { "\u0011", StringUtils.EMPTY },
215                            { "\u0012", StringUtils.EMPTY },
216                            { "\u0013", StringUtils.EMPTY },
217                            { "\u0014", StringUtils.EMPTY },
218                            { "\u0015", StringUtils.EMPTY },
219                            { "\u0016", StringUtils.EMPTY },
220                            { "\u0017", StringUtils.EMPTY },
221                            { "\u0018", StringUtils.EMPTY },
222                            { "\u0019", StringUtils.EMPTY },
223                            { "\u001a", StringUtils.EMPTY },
224                            { "\u001b", StringUtils.EMPTY },
225                            { "\u001c", StringUtils.EMPTY },
226                            { "\u001d", StringUtils.EMPTY },
227                            { "\u001e", StringUtils.EMPTY },
228                            { "\u001f", StringUtils.EMPTY },
229                            { "\ufffe", StringUtils.EMPTY },
230                            { "\uffff", StringUtils.EMPTY }
231                    }),
232            NumericEntityEscaper.between(0x7f, 0x84),
233            NumericEntityEscaper.between(0x86, 0x9f),
234            new UnicodeUnpairedSurrogateRemover()
235        );
236
237    /**
238     * Translator object for escaping XML 1.1.
239     *
240     * While {@link #escapeXml11(String)} is the expected method of use, this
241     * object allows the XML escaping functionality to be used
242     * as the foundation for a custom translator.
243     *
244     * @since 3.3
245     */
246    public static final CharSequenceTranslator ESCAPE_XML11 =
247        new AggregateTranslator(
248            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
249            new LookupTranslator(EntityArrays.APOS_ESCAPE()),
250            new LookupTranslator(
251                    new String[][] {
252                            { "\u0000", StringUtils.EMPTY },
253                            { "\u000b", "&#11;" },
254                            { "\u000c", "&#12;" },
255                            { "\ufffe", StringUtils.EMPTY },
256                            { "\uffff", StringUtils.EMPTY }
257                    }),
258            NumericEntityEscaper.between(0x1, 0x8),
259            NumericEntityEscaper.between(0xe, 0x1f),
260            NumericEntityEscaper.between(0x7f, 0x84),
261            NumericEntityEscaper.between(0x86, 0x9f),
262            new UnicodeUnpairedSurrogateRemover()
263        );
264
265    /**
266     * Translator object for escaping HTML version 3.0.
267     *
268     * While {@link #escapeHtml3(String)} is the expected method of use, this
269     * object allows the HTML escaping functionality to be used
270     * as the foundation for a custom translator.
271     *
272     * @since 3.0
273     */
274    public static final CharSequenceTranslator ESCAPE_HTML3 =
275        new AggregateTranslator(
276            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
277            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE())
278        );
279
280    /**
281     * Translator object for escaping HTML version 4.0.
282     *
283     * While {@link #escapeHtml4(String)} is the expected method of use, this
284     * object allows the HTML escaping functionality to be used
285     * as the foundation for a custom translator.
286     *
287     * @since 3.0
288     */
289    public static final CharSequenceTranslator ESCAPE_HTML4 =
290        new AggregateTranslator(
291            new LookupTranslator(EntityArrays.BASIC_ESCAPE()),
292            new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE()),
293            new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE())
294        );
295
296    /* UNESCAPE TRANSLATORS */
297
298    /**
299     * Translator object for escaping individual Comma Separated Values.
300     *
301     * While {@link #escapeCsv(String)} is the expected method of use, this
302     * object allows the CSV escaping functionality to be used
303     * as the foundation for a custom translator.
304     *
305     * @since 3.0
306     */
307    public static final CharSequenceTranslator ESCAPE_CSV = new CsvEscaper();
308
309    /**
310     * Translator object for unescaping escaped Java.
311     *
312     * While {@link #unescapeJava(String)} is the expected method of use, this
313     * object allows the Java unescaping functionality to be used
314     * as the foundation for a custom translator.
315     *
316     * @since 3.0
317     */
318    // TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
319    public static final CharSequenceTranslator UNESCAPE_JAVA =
320        new AggregateTranslator(
321            new OctalUnescaper(),     // .between('\1', '\377'),
322            new UnicodeUnescaper(),
323            new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
324            new LookupTranslator(
325                      new String[][] {
326                            {"\\\\", "\\"},
327                            {"\\\"", "\""},
328                            {"\\'", "'"},
329                            {"\\", ""}
330                      })
331        );
332
333    /**
334     * Translator object for unescaping escaped EcmaScript.
335     *
336     * While {@link #unescapeEcmaScript(String)} is the expected method of use, this
337     * object allows the EcmaScript unescaping functionality to be used
338     * as the foundation for a custom translator.
339     *
340     * @since 3.0
341     */
342    public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
343
344    /**
345     * Translator object for unescaping escaped Json.
346     *
347     * While {@link #unescapeJson(String)} is the expected method of use, this
348     * object allows the Json unescaping functionality to be used
349     * as the foundation for a custom translator.
350     *
351     * @since 3.2
352     */
353    public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
354
355    /**
356     * Translator object for unescaping escaped HTML 3.0.
357     *
358     * While {@link #unescapeHtml3(String)} is the expected method of use, this
359     * object allows the HTML unescaping functionality to be used
360     * as the foundation for a custom translator.
361     *
362     * @since 3.0
363     */
364    public static final CharSequenceTranslator UNESCAPE_HTML3 =
365        new AggregateTranslator(
366            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
367            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
368            new NumericEntityUnescaper()
369        );
370
371    /**
372     * Translator object for unescaping escaped HTML 4.0.
373     *
374     * While {@link #unescapeHtml4(String)} is the expected method of use, this
375     * object allows the HTML unescaping functionality to be used
376     * as the foundation for a custom translator.
377     *
378     * @since 3.0
379     */
380    public static final CharSequenceTranslator UNESCAPE_HTML4 =
381        new AggregateTranslator(
382            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
383            new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE()),
384            new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE()),
385            new NumericEntityUnescaper()
386        );
387
388    /**
389     * Translator object for unescaping escaped XML.
390     *
391     * While {@link #unescapeXml(String)} is the expected method of use, this
392     * object allows the XML unescaping functionality to be used
393     * as the foundation for a custom translator.
394     *
395     * @since 3.0
396     */
397    public static final CharSequenceTranslator UNESCAPE_XML =
398        new AggregateTranslator(
399            new LookupTranslator(EntityArrays.BASIC_UNESCAPE()),
400            new LookupTranslator(EntityArrays.APOS_UNESCAPE()),
401            new NumericEntityUnescaper()
402        );
403
404    /**
405     * Translator object for unescaping escaped Comma Separated Value entries.
406     *
407     * While {@link #unescapeCsv(String)} is the expected method of use, this
408     * object allows the CSV unescaping functionality to be used
409     * as the foundation for a custom translator.
410     *
411     * @since 3.0
412     */
413    public static final CharSequenceTranslator UNESCAPE_CSV = new CsvUnescaper();
414
415    /* Helper functions */
416
417    /**
418     * Returns a {@link String} value for a CSV column enclosed in double quotes,
419     * if required.
420     *
421     * <p>If the value contains a comma, newline or double quote, then the
422     *    String value is returned enclosed in double quotes.</p>
423     *
424     * <p>Any double quote characters in the value are escaped with another double quote.</p>
425     *
426     * <p>If the value does not contain a comma, newline or double quote, then the
427     *    String value is returned unchanged.</p>
428     *
429     * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
430     * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
431     *
432     * @param input the input CSV column String, may be null
433     * @return the input String, enclosed in double quotes if the value contains a comma,
434     * newline or double quote, {@code null} if null string input
435     * @since 2.4
436     */
437    public static final String escapeCsv(final String input) {
438        return ESCAPE_CSV.translate(input);
439    }
440
441    /**
442     * Escapes the characters in a {@link String} using EcmaScript String rules.
443     * <p>Escapes any values it finds into their EcmaScript String form.
444     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
445     *
446     * <p>So a tab becomes the characters {@code '\\'} and
447     * {@code 't'}.</p>
448     *
449     * <p>The only difference between Java strings and EcmaScript strings
450     * is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
451     *
452     * <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
453     *
454     * <p>Example:</p>
455     * <pre>
456     * input string: He didn't say, "Stop!"
457     * output string: He didn\'t say, \"Stop!\"
458     * </pre>
459     *
460     * @param input  String to escape values in, may be null
461     * @return String with escaped values, {@code null} if null string input
462     * @since 3.0
463     */
464    public static final String escapeEcmaScript(final String input) {
465        return ESCAPE_ECMASCRIPT.translate(input);
466    }
467
468    /**
469     * Escapes the characters in a {@link String} using HTML entities.
470     * <p>Supports only the HTML 3.0 entities.</p>
471     *
472     * @param input  the {@link String} to escape, may be null
473     * @return a new escaped {@link String}, {@code null} if null string input
474     * @since 3.0
475     */
476    public static final String escapeHtml3(final String input) {
477        return ESCAPE_HTML3.translate(input);
478    }
479
480    /**
481     * Escapes the characters in a {@link String} using HTML entities.
482     *
483     * <p>
484     * For example:
485     * </p>
486     * <p>{@code "bread" &amp; "butter"}</p>
487     * becomes:
488     * <p>
489     * {@code &amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;}.
490     * </p>
491     *
492     * <p>Supports all known HTML 4.0 entities, including funky accents.
493     * Note that the commonly used apostrophe escape character (&amp;apos;)
494     * is not a legal entity and so is not supported).</p>
495     *
496     * @param input  the {@link String} to escape, may be null
497     * @return a new escaped {@link String}, {@code null} if null string input
498     * @see <a href="https://web.archive.org/web/20060225074150/https://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
499     * @see <a href="https://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
500     * @see <a href="https://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
501     * @see <a href="https://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
502     * @see <a href="https://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
503     * @since 3.0
504     */
505    public static final String escapeHtml4(final String input) {
506        return ESCAPE_HTML4.translate(input);
507    }
508
509    /**
510     * Escapes the characters in a {@link String} using Java String rules.
511     *
512     * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
513     *
514     * <p>So a tab becomes the characters {@code '\\'} and
515     * {@code 't'}.</p>
516     *
517     * <p>The only difference between Java strings and JavaScript strings
518     * is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
519     *
520     * <p>Example:</p>
521     * <pre>
522     * input string: He didn't say, "Stop!"
523     * output string: He didn't say, \"Stop!\"
524     * </pre>
525     *
526     * @param input  String to escape values in, may be null
527     * @return String with escaped values, {@code null} if null string input
528     */
529    public static final String escapeJava(final String input) {
530        return ESCAPE_JAVA.translate(input);
531    }
532
533    /**
534     * Escapes the characters in a {@link String} using Json String rules.
535     * <p>Escapes any values it finds into their Json String form.
536     * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
537     *
538     * <p>So a tab becomes the characters {@code '\\'} and
539     * {@code 't'}.</p>
540     *
541     * <p>The only difference between Java strings and Json strings
542     * is that in Json, forward-slash (/) is escaped.</p>
543     *
544     * <p>See https://www.ietf.org/rfc/rfc4627.txt for further details.</p>
545     *
546     * <p>Example:</p>
547     * <pre>
548     * input string: He didn't say, "Stop!"
549     * output string: He didn't say, \"Stop!\"
550     * </pre>
551     *
552     * @param input  String to escape values in, may be null
553     * @return String with escaped values, {@code null} if null string input
554     * @since 3.2
555     */
556    public static final String escapeJson(final String input) {
557        return ESCAPE_JSON.translate(input);
558    }
559
560    /**
561     * Escapes the characters in a {@link String} using XML entities.
562     *
563     * <p>For example: {@code "bread" & "butter"} =&gt;
564     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
565     * </p>
566     *
567     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
568     * Does not support DTDs or external entities.</p>
569     *
570     * <p>Note that Unicode characters greater than 0x7f are as of 3.0, no longer
571     *    escaped. If you still wish this functionality, you can achieve it
572     *    via the following:
573     * {@code StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE));}</p>
574     *
575     * @param input  the {@link String} to escape, may be null
576     * @return a new escaped {@link String}, {@code null} if null string input
577     * @see #unescapeXml(String)
578     * @deprecated use {@link #escapeXml10(java.lang.String)} or {@link #escapeXml11(java.lang.String)} instead.
579     */
580    @Deprecated
581    public static final String escapeXml(final String input) {
582        return ESCAPE_XML.translate(input);
583    }
584
585    /**
586     * Escapes the characters in a {@link String} using XML entities.
587     * <p>
588     * For example:
589     * </p>
590     *
591     * <pre>{@code
592     * "bread" & "butter"
593     * }</pre>
594     * <p>
595     * converts to:
596     * </p>
597     *
598     * <pre>
599     * {@code
600     * &quot;bread&quot; &amp; &quot;butter&quot;
601     * }
602     * </pre>
603     *
604     * <p>
605     * Note that XML 1.0 is a text-only format: it cannot represent control characters or unpaired Unicode surrogate code points, even after escaping. The
606     * method {@code escapeXml10} will remove characters that do not fit in the following ranges:
607     * </p>
608     *
609     * <p>
610     * {@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}
611     * </p>
612     *
613     * <p>
614     * Though not strictly necessary, {@code escapeXml10} will escape characters in the following ranges:
615     * </p>
616     *
617     * <p>
618     * {@code [#x7F-#x84] | [#x86-#x9F]}
619     * </p>
620     *
621     * <p>
622     * The returned string can be inserted into a valid XML 1.0 or XML 1.1 document. If you want to allow more non-text characters in an XML 1.1 document, use
623     * {@link #escapeXml11(String)}.
624     * </p>
625     *
626     * @param input the {@link String} to escape, may be null
627     * @return a new escaped {@link String}, {@code null} if null string input
628     * @see #unescapeXml(String)
629     * @since 3.3
630     */
631    public static String escapeXml10(final String input) {
632        return ESCAPE_XML10.translate(input);
633    }
634
635    /**
636     * Escapes the characters in a {@link String} using XML entities.
637     *
638     * <p>For example: {@code "bread" & "butter"} =&gt;
639     * {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
640     * </p>
641     *
642     * <p>XML 1.1 can represent certain control characters, but it cannot represent
643     * the null byte or unpaired Unicode surrogate code points, even after escaping.
644     * {@code escapeXml11} will remove characters that do not fit in the following
645     * ranges:</p>
646     *
647     * <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
648     *
649     * <p>{@code escapeXml11} will escape characters in the following ranges:</p>
650     *
651     * <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
652     *
653     * <p>The returned string can be inserted into a valid XML 1.1 document. Do not
654     * use it for XML 1.0 documents.</p>
655     *
656     * @param input  the {@link String} to escape, may be null
657     * @return a new escaped {@link String}, {@code null} if null string input
658     * @see #unescapeXml(String)
659     * @since 3.3
660     */
661    public static String escapeXml11(final String input) {
662        return ESCAPE_XML11.translate(input);
663    }
664
665    /**
666     * Returns a {@link String} value for an unescaped CSV column.
667     *
668     * <p>If the value is enclosed in double quotes, and contains a comma, newline
669     *    or double quote, then quotes are removed.
670     * </p>
671     *
672     * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
673     *    to just one double quote.</p>
674     *
675     * <p>If the value is not enclosed in double quotes, or is and does not contain a
676     *    comma, newline or double quote, then the String value is returned unchanged.</p>
677     *
678     * see <a href="https://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
679     * <a href="https://datatracker.ietf.org/doc/html/rfc4180">RFC 4180</a>.
680     *
681     * @param input the input CSV column String, may be null
682     * @return the input String, with enclosing double quotes removed and embedded double
683     * quotes unescaped, {@code null} if null string input
684     * @since 2.4
685     */
686    public static final String unescapeCsv(final String input) {
687        return UNESCAPE_CSV.translate(input);
688    }
689
690    /**
691     * Unescapes any EcmaScript literals found in the {@link String}.
692     *
693     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
694     * into a newline character, unless the {@code '\'} is preceded by another
695     * {@code '\'}.</p>
696     *
697     * @see #unescapeJava(String)
698     * @param input  the {@link String} to unescape, may be null
699     * @return A new unescaped {@link String}, {@code null} if null string input
700     * @since 3.0
701     */
702    public static final String unescapeEcmaScript(final String input) {
703        return UNESCAPE_ECMASCRIPT.translate(input);
704    }
705
706    /**
707     * Unescapes a string containing entity escapes to a string
708     * containing the actual Unicode characters corresponding to the
709     * escapes. Supports only HTML 3.0 entities.
710     *
711     * @param input  the {@link String} to unescape, may be null
712     * @return a new unescaped {@link String}, {@code null} if null string input
713     * @since 3.0
714     */
715    public static final String unescapeHtml3(final String input) {
716        return UNESCAPE_HTML3.translate(input);
717    }
718
719    /**
720     * Unescapes a string containing entity escapes to a string
721     * containing the actual Unicode characters corresponding to the
722     * escapes. Supports HTML 4.0 entities.
723     *
724     * <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
725     * will become {@code "<Français>"}</p>
726     *
727     * <p>If an entity is unrecognized, it is left alone, and inserted
728     * verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
729     * become {@code ">&zzzz;x"}.</p>
730     *
731     * @param input  the {@link String} to unescape, may be null
732     * @return a new unescaped {@link String}, {@code null} if null string input
733     * @since 3.0
734     */
735    public static final String unescapeHtml4(final String input) {
736        return UNESCAPE_HTML4.translate(input);
737    }
738
739    /**
740     * Unescapes any Java literals found in the {@link String}.
741     * For example, it will turn a sequence of {@code '\'} and
742     * {@code 'n'} into a newline character, unless the {@code '\'}
743     * is preceded by another {@code '\'}.
744     *
745     * @param input  the {@link String} to unescape, may be null
746     * @return a new unescaped {@link String}, {@code null} if null string input
747     */
748    public static final String unescapeJava(final String input) {
749        return UNESCAPE_JAVA.translate(input);
750    }
751
752    /**
753     * Unescapes any Json literals found in the {@link String}.
754     *
755     * <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
756     * into a newline character, unless the {@code '\'} is preceded by another
757     * {@code '\'}.</p>
758     *
759     * @see #unescapeJava(String)
760     * @param input  the {@link String} to unescape, may be null
761     * @return A new unescaped {@link String}, {@code null} if null string input
762     * @since 3.2
763     */
764    public static final String unescapeJson(final String input) {
765        return UNESCAPE_JSON.translate(input);
766    }
767
768    /**
769     * Unescapes a string containing XML entity escapes to a string
770     * containing the actual Unicode characters corresponding to the
771     * escapes.
772     *
773     * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
774     * Does not support DTDs or external entities.</p>
775     *
776     * <p>Note that numerical \\u Unicode codes are unescaped to their respective
777     *    Unicode characters. This may change in future releases.</p>
778     *
779     * @param input  the {@link String} to unescape, may be null
780     * @return a new unescaped {@link String}, {@code null} if null string input
781     * @see #escapeXml(String)
782     * @see #escapeXml10(String)
783     * @see #escapeXml11(String)
784     */
785    public static final String unescapeXml(final String input) {
786        return UNESCAPE_XML.translate(input);
787    }
788
789    /**
790     * {@link StringEscapeUtils} instances should NOT be constructed in
791     * standard programming.
792     *
793     * <p>Instead, the class should be used as:</p>
794     * <pre>StringEscapeUtils.escapeJava("foo");</pre>
795     *
796     * <p>This constructor is public to permit tools that require a JavaBean
797     * instance to operate.</p>
798     *
799     * @deprecated TODO Make private in 4.0.
800     */
801    @Deprecated
802    public StringEscapeUtils() {
803        // empty
804    }
805
806}