001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.wicket.util.string; 018 019import java.io.UnsupportedEncodingException; 020import java.nio.charset.Charset; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Locale; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026 027import org.apache.wicket.util.lang.Args; 028 029/** 030 * A variety of static String utility methods. 031 * <p> 032 * The escapeMarkup() and toMultilineMarkup() methods are useful for turning normal Java Strings 033 * into HTML strings. 034 * <p> 035 * The lastPathComponent(), firstPathComponent(), afterFirstPathComponent() and 036 * beforeLastPathComponent() methods can chop up a String into path components using a separator 037 * character. If the separator cannot be found the original String is returned. 038 * <p> 039 * Similarly, the beforeLast(), beforeFirst(), afterFirst() and afterLast() methods return sections 040 * before and after a separator character. But if the separator cannot be found, an empty string is 041 * returned. 042 * <p> 043 * Some other miscellaneous methods will strip a given ending off a String if it can be found 044 * (stripEnding()), replace all occurrences of one String with another (replaceAll), do type 045 * conversions (toBoolean(), toChar(), toString()), check a String for emptiness (isEmpty()), 046 * convert a Throwable to a String (toString(Throwable)) or capitalize a String (capitalize()). 047 * 048 * @author Jonathan Locke 049 */ 050public final class Strings 051{ 052 /** 053 * The line separator for the current platform. 054 * 055 * @deprecated Use {@link System#lineSeparator()} 056 */ 057 @Deprecated 058 public static final String LINE_SEPARATOR = System.lineSeparator(); 059 060 /** A table of hex digits */ 061 private static final char[] HEX_DIGIT = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 062 'A', 'B', 'C', 'D', 'E', 'F' }; 063 064 private static final Pattern HTML_NUMBER_REGEX = Pattern.compile("&#\\d+;"); 065 066 private static final String[] NO_STRINGS = new String[0]; 067 068 /** 069 * The name of the parameter used to keep the session id. 070 * The Servlet specification mandates <em>jsessionid</em> but the web containers 071 * provide ways to set a custom one, e.g. <em>sid</em>. 072 * Since Wicket doesn't have access to the web container internals the name should be set explicitly. 073 */ 074 public static final String SESSION_ID_PARAM_NAME = System.getProperty("wicket.jsessionid.name", "jsessionid"); 075 076 /** 077 * Constructs something like <em>;jsessionid=</em>. This is what {@linkplain Strings#stripJSessionId(String)} 078 * actually uses. 079 */ 080 // the field is not 'final' because we need to modify it in a unit test 081 // see https://github.com/openjdk/jdk/pull/5027#issuecomment-968177213 082 private static String SESSION_ID_PARAM = ';' + SESSION_ID_PARAM_NAME + '='; 083 084 /** 085 * Private constructor prevents construction. 086 */ 087 private Strings() 088 { 089 } 090 091 /** 092 * Returns everything after the first occurrence of the given character in s. 093 * 094 * @param s 095 * The string 096 * @param c 097 * The character 098 * @return Everything after the first occurrence of the given character in s. If the character 099 * cannot be found, an empty string is returned. 100 */ 101 public static String afterFirst(final String s, final char c) 102 { 103 if (s == null) 104 { 105 return null; 106 } 107 final int index = s.indexOf(c); 108 109 if (index == -1) 110 { 111 return ""; 112 } 113 114 return s.substring(index + 1); 115 } 116 117 /** 118 * Gets everything after the first path component of a path using a given separator. If the 119 * separator cannot be found, an empty String is returned. 120 * <p> 121 * For example, afterFirstPathComponent("foo:bar:baz", ':') would return "bar:baz" and 122 * afterFirstPathComponent("foo", ':') would return "". 123 * 124 * @param path 125 * The path to parse 126 * @param separator 127 * The path separator character 128 * @return Everything after the first component in the path 129 */ 130 public static String afterFirstPathComponent(final String path, final char separator) 131 { 132 return afterFirst(path, separator); 133 } 134 135 /** 136 * Returns everything after the last occurrence of the given character in s. 137 * 138 * @param s 139 * The string 140 * @param c 141 * The character 142 * @return Everything after the last occurrence of the given character in s. If the character 143 * cannot be found, an empty string is returned. 144 */ 145 public static String afterLast(final String s, final char c) 146 { 147 if (s == null) 148 { 149 return null; 150 } 151 final int index = s.lastIndexOf(c); 152 153 if (index == -1) 154 { 155 return ""; 156 } 157 158 return s.substring(index + 1); 159 } 160 161 /** 162 * Returns everything before the first occurrence of the given character in s. 163 * 164 * @param s 165 * The string 166 * @param c 167 * The character 168 * @return Everything before the first occurrence of the given character in s. If the character 169 * cannot be found, an empty string is returned. 170 */ 171 public static String beforeFirst(final String s, final char c) 172 { 173 if (s == null) 174 { 175 return null; 176 } 177 final int index = s.indexOf(c); 178 179 if (index == -1) 180 { 181 return ""; 182 } 183 184 return s.substring(0, index); 185 } 186 187 /** 188 * Returns everything before the last occurrence of the given character in s. 189 * 190 * @param s 191 * The string 192 * @param c 193 * The character 194 * @return Everything before the last occurrence of the given character in s. If the character 195 * cannot be found, an empty string is returned. 196 */ 197 public static String beforeLast(final String s, final char c) 198 { 199 if (s == null) 200 { 201 return null; 202 } 203 final int index = s.lastIndexOf(c); 204 205 if (index == -1) 206 { 207 return ""; 208 } 209 210 return s.substring(0, index); 211 } 212 213 /** 214 * Gets everything before the last path component of a path using a given separator. If the 215 * separator cannot be found, the path itself is returned. 216 * <p> 217 * For example, beforeLastPathComponent("foo.bar.baz", '.') would return "foo.bar" and 218 * beforeLastPathComponent("foo", '.') would return "". 219 * 220 * @param path 221 * The path to parse 222 * @param separator 223 * The path separator character 224 * @return Everything before the last component in the path 225 */ 226 public static String beforeLastPathComponent(final String path, final char separator) 227 { 228 return beforeLast(path, separator); 229 } 230 231 /** 232 * Capitalizes a string. 233 * 234 * @param s 235 * The string 236 * @return The capitalized string 237 */ 238 public static String capitalize(final String s) 239 { 240 if (s == null) 241 { 242 return null; 243 } 244 final char[] chars = s.toCharArray(); 245 246 if (chars.length > 0) 247 { 248 chars[0] = Character.toUpperCase(chars[0]); 249 } 250 251 return new String(chars); 252 } 253 254 /** 255 * Converts a Java String to an HTML markup string, but does not convert normal spaces to 256 * non-breaking space entities (<nbsp>). 257 * 258 * @param s 259 * The characters to escape 260 * @see Strings#escapeMarkup(CharSequence, boolean) 261 * @return The escaped string 262 */ 263 public static CharSequence escapeMarkup(final CharSequence s) 264 { 265 return escapeMarkup(s, false); 266 } 267 268 /** 269 * Converts a Java String to an HTML markup String by replacing illegal characters with HTML 270 * entities where appropriate. Spaces are converted to non-breaking spaces (<nbsp>) if 271 * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are 272 * converted to &lt; entities and greater than signs to &gt; entities. 273 * 274 * @param s 275 * The characters to escape 276 * @param escapeSpaces 277 * True to replace ' ' with nonbreaking space 278 * @return The escaped string 279 */ 280 public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces) 281 { 282 return escapeMarkup(s, escapeSpaces, false); 283 } 284 285 /** 286 * Converts a Java String to an HTML markup String by replacing illegal characters with HTML 287 * entities where appropriate. Spaces are converted to non-breaking spaces (<nbsp>) if 288 * escapeSpaces is true, tabs are converted to four non-breaking spaces, less than signs are 289 * converted to &lt; entities and greater than signs to &gt; entities. 290 * 291 * @param s 292 * The characters to escape 293 * @param escapeSpaces 294 * True to replace ' ' with nonbreaking space 295 * @param convertToHtmlUnicodeEscapes 296 * True to convert non-7 bit characters to unicode HTML (&#...) 297 * @return The escaped string 298 */ 299 public static CharSequence escapeMarkup(final CharSequence s, final boolean escapeSpaces, 300 final boolean convertToHtmlUnicodeEscapes) 301 { 302 if (s == null) 303 { 304 return null; 305 } 306 307 final int len = s.length(); 308 if (len == 0) 309 { 310 return s; 311 } 312 313 final AppendingStringBuffer buffer = new AppendingStringBuffer((int)(len * 1.1)); 314 315 for (int i = 0; i < len; i++) 316 { 317 final char c = s.charAt(i); 318 319 if (Character.getType(c) == Character.UNASSIGNED) 320 { 321 continue; 322 } 323 switch (c) 324 { 325 case '\t' : 326 if (escapeSpaces) 327 { 328 // Assumption is four space tabs (sorry, but that's 329 // just how it is!) 330 buffer.append(" "); 331 } 332 else 333 { 334 buffer.append(c); 335 } 336 break; 337 338 case ' ' : 339 if (escapeSpaces) 340 { 341 buffer.append(" "); 342 } 343 else 344 { 345 buffer.append(c); 346 } 347 break; 348 349 case '<' : 350 buffer.append("<"); 351 break; 352 353 case '>' : 354 buffer.append(">"); 355 break; 356 357 case '&' : 358 359 buffer.append("&"); 360 break; 361 362 case '"' : 363 buffer.append("""); 364 break; 365 366 case '\'' : 367 buffer.append("'"); 368 break; 369 370 default : 371 372 int ci = 0xffff & c; 373 374 if ( 375 // if this is non-printable and not whitespace (TAB, LF, CR) 376 ((ci < 32) && (ci != 9) && (ci != 10) && (ci != 13)) || 377 // or non-ASCII (XXX: why 160+ ?!) and need to UNICODE escape it 378 (convertToHtmlUnicodeEscapes && (ci > 159))) 379 { 380 buffer.append("&#"); 381 buffer.append(Integer.toString(ci)); 382 buffer.append(';'); 383 } 384 else 385 { 386 // ASCII or whitespace 387 buffer.append(c); 388 } 389 break; 390 } 391 } 392 393 return buffer; 394 } 395 396 /** 397 * Unescapes the escaped entities in the <code>markup</code> passed. 398 * 399 * @param markup 400 * The source <code>String</code> to unescape. 401 * @return the unescaped markup or <code>null</null> if the input is <code>null</code> 402 */ 403 public static CharSequence unescapeMarkup(final String markup) 404 { 405 String unescapedMarkup = StringEscapeUtils.unescapeHtml(markup); 406 return unescapedMarkup; 407 } 408 409 /** 410 * Gets the first path component of a path using a given separator. If the separator cannot be 411 * found, the path itself is returned. 412 * <p> 413 * For example, firstPathComponent("foo.bar", '.') would return "foo" and 414 * firstPathComponent("foo", '.') would return "foo". 415 * 416 * @param path 417 * The path to parse 418 * @param separator 419 * The path separator character 420 * @return The first component in the path or path itself if no separator characters exist. 421 */ 422 public static String firstPathComponent(final String path, final char separator) 423 { 424 if (path == null) 425 { 426 return null; 427 } 428 final int index = path.indexOf(separator); 429 430 if (index == -1) 431 { 432 return path; 433 } 434 435 return path.substring(0, index); 436 } 437 438 /** 439 * Converts encoded \uxxxx to unicode chars and changes special saved chars to their 440 * original forms. 441 * 442 * @param escapedUnicodeString 443 * escaped unicode string, like '\u4F60\u597D'. 444 * 445 * @return The actual unicode. Can be used for instance with message bundles 446 */ 447 public static String fromEscapedUnicode(final String escapedUnicodeString) 448 { 449 int off = 0; 450 char[] in = escapedUnicodeString.toCharArray(); 451 int len = in.length; 452 char[] out = new char[len]; 453 char aChar; 454 int outLen = 0; 455 int end = off + len; 456 457 while (off < end) 458 { 459 aChar = in[off++]; 460 if (aChar == '\\') 461 { 462 aChar = in[off++]; 463 if (aChar == 'u') 464 { 465 // Read the xxxx 466 int value = 0; 467 for (int i = 0; i < 4; i++) 468 { 469 aChar = in[off++]; 470 switch (aChar) 471 { 472 case '0' : 473 case '1' : 474 case '2' : 475 case '3' : 476 case '4' : 477 case '5' : 478 case '6' : 479 case '7' : 480 case '8' : 481 case '9' : 482 value = (value << 4) + aChar - '0'; 483 break; 484 case 'a' : 485 case 'b' : 486 case 'c' : 487 case 'd' : 488 case 'e' : 489 case 'f' : 490 value = (value << 4) + 10 + aChar - 'a'; 491 break; 492 case 'A' : 493 case 'B' : 494 case 'C' : 495 case 'D' : 496 case 'E' : 497 case 'F' : 498 value = (value << 4) + 10 + aChar - 'A'; 499 break; 500 default : 501 throw new IllegalArgumentException("Malformed \\uxxxx encoding."); 502 } 503 } 504 out[outLen++] = (char)value; 505 } 506 else 507 { 508 if (aChar == 't') 509 { 510 aChar = '\t'; 511 } 512 else if (aChar == 'r') 513 { 514 aChar = '\r'; 515 } 516 else if (aChar == 'n') 517 { 518 aChar = '\n'; 519 } 520 else if (aChar == 'f') 521 { 522 aChar = '\f'; 523 } 524 out[outLen++] = aChar; 525 } 526 } 527 else 528 { 529 out[outLen++] = aChar; 530 } 531 } 532 return new String(out, 0, outLen); 533 } 534 535 /** 536 * Checks whether the <code>string</code> is considered empty. Empty means that the string may 537 * contain whitespace, but no visible characters. 538 * 539 * "\n\t " is considered empty, while " a" is not. 540 * 541 * @param string 542 * The string 543 * @return True if the string is null or "" 544 */ 545 public static boolean isEmpty(final CharSequence string) 546 { 547 return string == null || string.length() == 0 || 548 (string.charAt(0) <= ' ' && string.toString().trim().isEmpty()); 549 } 550 551 /** 552 * Checks whether the <code>string</code> is considered empty. Empty means that the string may 553 * contain whitespace, but no visible characters. 554 * 555 * "\n\t " is considered empty, while " a" is not. 556 * 557 * Note: This method overloads {@link #isEmpty(CharSequence)} for performance reasons. 558 * 559 * @param string 560 * The string 561 * @return True if the string is null or "" 562 */ 563 public static boolean isEmpty(final String string) 564 { 565 return string == null || string.isEmpty() || 566 (string.charAt(0) <= ' ' && string.trim().isEmpty()); 567 } 568 569 /** 570 * Checks whether two strings are equals taken care of 'null' values and treating 'null' same as 571 * trim(string).equals("") 572 * 573 * @param string1 574 * @param string2 575 * @return true, if both strings are equal 576 */ 577 public static boolean isEqual(final String string1, final String string2) 578 { 579 if ((string1 == null) && (string2 == null)) 580 { 581 return true; 582 } 583 584 if (isEmpty(string1) && isEmpty(string2)) 585 { 586 return true; 587 } 588 if ((string1 == null) || (string2 == null)) 589 { 590 return false; 591 } 592 593 return string1.equals(string2); 594 } 595 596 /** 597 * Converts the text in <code>s</code> to a corresponding boolean. On, yes, y, true and 1 are 598 * converted to <code>true</code>. Off, no, n, false and 0 (zero) are converted to 599 * <code>false</code>. An empty string is converted to <code>false</code>. Conversion is 600 * case-insensitive, and does <em>not</em> take internationalization into account. 601 * 602 * 'Ja', 'Oui', 'Igen', 'Nein', 'Nee', 'Non', 'Nem' are all illegal values. 603 * 604 * @param s 605 * the value to convert into a boolean 606 * @return Boolean the converted value of <code>s</code> 607 * @throws StringValueConversionException 608 * when the value of <code>s</code> is not recognized. 609 */ 610 public static boolean isTrue(final String s) throws StringValueConversionException 611 { 612 if (s != null) 613 { 614 if (s.equalsIgnoreCase("true")) 615 { 616 return true; 617 } 618 619 if (s.equalsIgnoreCase("false")) 620 { 621 return false; 622 } 623 624 if (s.equalsIgnoreCase("on") || s.equalsIgnoreCase("yes") || s.equalsIgnoreCase("y") || 625 s.equalsIgnoreCase("1")) 626 { 627 return true; 628 } 629 630 if (s.equalsIgnoreCase("off") || s.equalsIgnoreCase("no") || s.equalsIgnoreCase("n") || 631 s.equalsIgnoreCase("0")) 632 { 633 return false; 634 } 635 636 if (isEmpty(s)) 637 { 638 return false; 639 } 640 641 throw new StringValueConversionException("Boolean value \"" + s + "\" not recognized"); 642 } 643 644 return false; 645 } 646 647 /** 648 * Joins string fragments using the specified separator 649 * 650 * @param separator 651 * @param fragments 652 * @return combined fragments 653 */ 654 public static String join(final String separator, final List<String> fragments) 655 { 656 if (fragments == null) 657 { 658 return ""; 659 } 660 return join(separator, fragments.toArray(new String[0])); 661 } 662 663 /** 664 * Joins string fragments using the specified separator 665 * 666 * @param separator 667 * @param fragments 668 * @return combined fragments 669 */ 670 public static String join(final String separator, final String... fragments) 671 { 672 if ((fragments == null) || (fragments.length < 1)) 673 { 674 // no elements 675 return ""; 676 } 677 else if (fragments.length < 2) 678 { 679 // single element 680 return fragments[0]; 681 } 682 else 683 { 684 // two or more elements 685 AppendingStringBuffer buff = new AppendingStringBuffer(128); 686 if (fragments[0] != null) 687 { 688 buff.append(fragments[0]); 689 } 690 boolean separatorNotEmpty = !Strings.isEmpty(separator); 691 for (int i = 1; i < fragments.length; i++) 692 { 693 String fragment = fragments[i]; 694 String previousFragment = fragments[i - 1]; 695 if (previousFragment != null || fragment != null) 696 { 697 boolean lhsClosed = previousFragment.endsWith(separator); 698 boolean rhsClosed = fragment.startsWith(separator); 699 if (separatorNotEmpty && lhsClosed && rhsClosed) 700 { 701 buff.append(fragment.substring(1)); 702 } 703 else if (!lhsClosed && !rhsClosed) 704 { 705 if (!Strings.isEmpty(fragment)) 706 { 707 buff.append(separator); 708 } 709 buff.append(fragment); 710 } 711 else 712 { 713 buff.append(fragment); 714 } 715 } 716 } 717 return buff.toString(); 718 } 719 } 720 721 /** 722 * Gets the last path component of a path using a given separator. If the separator cannot be 723 * found, the path itself is returned. 724 * <p> 725 * For example, lastPathComponent("foo.bar", '.') would return "bar" and 726 * lastPathComponent("foo", '.') would return "foo". 727 * 728 * @param path 729 * The path to parse 730 * @param separator 731 * The path separator character 732 * @return The last component in the path or path itself if no separator characters exist. 733 */ 734 public static String lastPathComponent(final String path, final char separator) 735 { 736 if (path == null) 737 { 738 return null; 739 } 740 741 final int index = path.lastIndexOf(separator); 742 743 if (index == -1) 744 { 745 return path; 746 } 747 748 return path.substring(index + 1); 749 } 750 751 /** 752 * Replace all occurrences of one string replaceWith another string. 753 * 754 * @param s 755 * The string to process 756 * @param searchFor 757 * The value to search for 758 * @param replaceWith 759 * The value to searchFor replaceWith 760 * @return The resulting string with searchFor replaced with replaceWith 761 */ 762 public static CharSequence replaceAll(final CharSequence s, final CharSequence searchFor, 763 CharSequence replaceWith) 764 { 765 if (s == null) 766 { 767 return null; 768 } 769 770 // If searchFor is null or the empty string, then there is nothing to 771 // replace, so returning s is the only option here. 772 if ((searchFor == null) || searchFor.length() == 0) 773 { 774 return s; 775 } 776 777 // If replaceWith is null, then the searchFor should be replaced with 778 // nothing, which can be seen as the empty string. 779 if (replaceWith == null) 780 { 781 replaceWith = ""; 782 } 783 784 String searchString = searchFor.toString(); 785 // Look for first occurrence of searchFor 786 int matchIndex = search(s, searchString, 0); 787 if (matchIndex == -1) 788 { 789 // No replace operation needs to happen 790 return s; 791 } 792 else 793 { 794 return s.toString().replace(searchString, replaceWith); 795 } 796 } 797 798 /** 799 * Replace HTML numbers like &#20540; by the appropriate character. 800 * 801 * @param str 802 * The text to be evaluated 803 * @return The text with "numbers" replaced 804 */ 805 public static String replaceHtmlEscapeNumber(String str) 806 { 807 if (str == null) 808 { 809 return null; 810 } 811 Matcher matcher = HTML_NUMBER_REGEX.matcher(str); 812 while (matcher.find()) 813 { 814 int pos = matcher.start(); 815 int end = matcher.end(); 816 int number = Integer.parseInt(str.substring(pos + 2, end - 1)); 817 char ch = (char)number; 818 str = str.substring(0, pos) + ch + str.substring(end); 819 matcher = HTML_NUMBER_REGEX.matcher(str); 820 } 821 822 return str; 823 } 824 825 /** 826 * Simpler, faster version of String.split() for splitting on a simple character. 827 * 828 * @param s 829 * The string to split 830 * @param c 831 * The character to split on 832 * @return The array of strings 833 */ 834 public static String[] split(final String s, final char c) 835 { 836 if (s == null || s.isEmpty()) 837 { 838 return NO_STRINGS; 839 } 840 841 int pos = s.indexOf(c); 842 if (pos == -1) 843 { 844 return new String[] { s }; 845 } 846 847 int next = s.indexOf(c, pos + 1); 848 if (next == -1) 849 { 850 return new String[] { s.substring(0, pos), s.substring(pos + 1) }; 851 } 852 853 final List<String> strings = new ArrayList<>(); 854 strings.add(s.substring(0, pos)); 855 strings.add(s.substring(pos + 1, next)); 856 while (true) 857 { 858 pos = next + 1; 859 next = s.indexOf(c, pos); 860 if (next == -1) 861 { 862 strings.add(s.substring(pos)); 863 break; 864 } 865 else 866 { 867 strings.add(s.substring(pos, next)); 868 } 869 } 870 final String[] result = new String[strings.size()]; 871 strings.toArray(result); 872 return result; 873 } 874 875 /** 876 * Strips the ending from the string <code>s</code>. 877 * 878 * @param s 879 * The string to strip 880 * @param ending 881 * The ending to strip off 882 * @return The stripped string or the original string if the ending did not exist 883 */ 884 public static String stripEnding(final String s, final String ending) 885 { 886 if (s == null) 887 { 888 return null; 889 } 890 891 // Stripping a null or empty string from the end returns the 892 // original string. 893 if (ending == null || ending.isEmpty()) 894 { 895 return s; 896 } 897 final int endingLength = ending.length(); 898 final int sLength = s.length(); 899 900 // When the length of the ending string is larger 901 // than the original string, the original string is returned. 902 if (endingLength > sLength) 903 { 904 return s; 905 } 906 final int index = s.lastIndexOf(ending); 907 final int endpos = sLength - endingLength; 908 909 if (index == endpos) 910 { 911 return s.substring(0, endpos); 912 } 913 914 return s; 915 } 916 917 /** 918 * Strip any jsessionid and possibly other redundant info that might be in our way. 919 * 920 * @param url 921 * The url to strip 922 * @return The stripped url 923 */ 924 public static String stripJSessionId(final String url) 925 { 926 if (Strings.isEmpty(url)) 927 { 928 return url; 929 } 930 931 // http://.../abc;jsessionid=...?param=... 932 int ixSemiColon = url.indexOf(SESSION_ID_PARAM); 933 if (ixSemiColon == -1) 934 { 935 return url; 936 } 937 938 int ixQuestionMark = url.indexOf('?'); 939 if (ixQuestionMark == -1) 940 { 941 // no query paramaters; cut off at ";" 942 // http://.../abc;jsession=... 943 return url.substring(0, ixSemiColon); 944 } 945 946 if (ixQuestionMark <= ixSemiColon) 947 { 948 // ? is before ; - no jsessionid in the url 949 return url; 950 } 951 952 return url.substring(0, ixSemiColon) + url.substring(ixQuestionMark); 953 } 954 955 /** 956 * Converts the string s to a Boolean. See <code>isTrue</code> for valid values of s. 957 * 958 * @param s 959 * The string to convert. 960 * @return Boolean <code>TRUE</code> when <code>isTrue(s)</code>. 961 * @throws StringValueConversionException 962 * when s is not a valid value 963 * @see #isTrue(String) 964 */ 965 public static Boolean toBoolean(final String s) throws StringValueConversionException 966 { 967 return isTrue(s); 968 } 969 970 /** 971 * Converts the 1 character string s to a character. 972 * 973 * @param s 974 * The 1 character string to convert to a char. 975 * @return Character value to convert 976 * @throws StringValueConversionException 977 * when the string is longer or shorter than 1 character, or <code>null</code>. 978 */ 979 public static char toChar(final String s) throws StringValueConversionException 980 { 981 if (s != null) 982 { 983 if (s.length() == 1) 984 { 985 return s.charAt(0); 986 } 987 else 988 { 989 throw new StringValueConversionException("Expected single character, not \"" + s + 990 "\""); 991 } 992 } 993 994 throw new StringValueConversionException("Character value was null"); 995 } 996 997 /** 998 * Converts unicodes to encoded \uxxxx. 999 * 1000 * @param unicodeString 1001 * The unicode string 1002 * @return The escaped unicode string, like '\u4F60\u597D'. 1003 */ 1004 public static String toEscapedUnicode(final String unicodeString) 1005 { 1006 if (unicodeString == null || unicodeString.isEmpty()) 1007 { 1008 return unicodeString; 1009 } 1010 int len = unicodeString.length(); 1011 int bufLen = len * 2; 1012 StringBuilder outBuffer = new StringBuilder(bufLen); 1013 for (int x = 0; x < len; x++) 1014 { 1015 char aChar = unicodeString.charAt(x); 1016 if (Character.getType(aChar) == Character.UNASSIGNED) 1017 { 1018 continue; 1019 } 1020 // Handle common case first, selecting largest block that 1021 // avoids the specials below 1022 if ((aChar > 61) && (aChar < 127)) 1023 { 1024 if (aChar == '\\') 1025 { 1026 outBuffer.append('\\'); 1027 outBuffer.append('\\'); 1028 continue; 1029 } 1030 outBuffer.append(aChar); 1031 continue; 1032 } 1033 switch (aChar) 1034 { 1035 case ' ' : 1036 if (x == 0) 1037 { 1038 outBuffer.append('\\'); 1039 } 1040 outBuffer.append(' '); 1041 break; 1042 case '\t' : 1043 outBuffer.append('\\'); 1044 outBuffer.append('t'); 1045 break; 1046 case '\n' : 1047 outBuffer.append('\\'); 1048 outBuffer.append('n'); 1049 break; 1050 case '\r' : 1051 outBuffer.append('\\'); 1052 outBuffer.append('r'); 1053 break; 1054 case '\f' : 1055 outBuffer.append('\\'); 1056 outBuffer.append('f'); 1057 break; 1058 case '=' : // Fall through 1059 case ':' : // Fall through 1060 case '#' : // Fall through 1061 case '!' : 1062 outBuffer.append('\\'); 1063 outBuffer.append(aChar); 1064 break; 1065 default : 1066 if ((aChar < 0x0020) || (aChar > 0x007e)) 1067 { 1068 outBuffer.append('\\'); 1069 outBuffer.append('u'); 1070 outBuffer.append(toHex((aChar >> 12) & 0xF)); 1071 outBuffer.append(toHex((aChar >> 8) & 0xF)); 1072 outBuffer.append(toHex((aChar >> 4) & 0xF)); 1073 outBuffer.append(toHex(aChar & 0xF)); 1074 } 1075 else 1076 { 1077 outBuffer.append(aChar); 1078 } 1079 } 1080 } 1081 return outBuffer.toString(); 1082 } 1083 1084 /** 1085 * Converts a String to multiline HTML markup by replacing newlines with line break entities 1086 * (<br/>) and multiple occurrences of newline with paragraph break entities (<p>). 1087 * 1088 * @param s 1089 * String to transform 1090 * @return String with all single occurrences of newline replaced with <br/> and all 1091 * multiple occurrences of newline replaced with <p>. 1092 */ 1093 public static CharSequence toMultilineMarkup(final CharSequence s) 1094 { 1095 if (s == null) 1096 { 1097 return null; 1098 } 1099 1100 final int len = s.length(); 1101 1102 // allocate a buffer that is 10% larger than the original string to account for markup 1103 final AppendingStringBuffer buffer = new AppendingStringBuffer((int) (len * 1.1) + 16); 1104 int newlineCount = 0; 1105 1106 buffer.append("<p>"); 1107 for (int i = 0; i < len; i++) 1108 { 1109 final char c = s.charAt(i); 1110 1111 switch (c) 1112 { 1113 case '\n' : 1114 newlineCount++; 1115 break; 1116 1117 case '\r' : 1118 break; 1119 1120 default : 1121 if (newlineCount == 1) 1122 { 1123 buffer.append("<br/>"); 1124 } 1125 else if (newlineCount > 1) 1126 { 1127 buffer.append("</p><p>"); 1128 } 1129 1130 buffer.append(c); 1131 newlineCount = 0; 1132 break; 1133 } 1134 } 1135 if (newlineCount == 1) 1136 { 1137 buffer.append("<br/>"); 1138 } 1139 else if (newlineCount > 1) 1140 { 1141 buffer.append("</p><p>"); 1142 } 1143 buffer.append("</p>"); 1144 return buffer; 1145 } 1146 1147 /** 1148 * Converts the given object to a string. Does special conversion for {@link Throwable 1149 * throwables} and String arrays of length 1 (in which case it just returns to string in that 1150 * array, as this is a common thing to have in the Servlet API). 1151 * 1152 * @param object 1153 * The object 1154 * @return The string 1155 */ 1156 public static String toString(final Object object) 1157 { 1158 if (object == null) 1159 { 1160 return null; 1161 } 1162 1163 if (object instanceof Throwable) 1164 { 1165 return toString((Throwable)object); 1166 } 1167 1168 if (object instanceof String) 1169 { 1170 return (String)object; 1171 } 1172 1173 if ((object instanceof String[]) && (((String[])object).length == 1)) 1174 { 1175 return ((String[])object)[0]; 1176 } 1177 1178 return object.toString(); 1179 } 1180 1181 1182 /** 1183 * Converts a Throwable to a string. 1184 * 1185 * @param throwable 1186 * The throwable 1187 * @return The string 1188 */ 1189 public static String toString(final Throwable throwable) 1190 { 1191 if (throwable != null) 1192 { 1193 List<Throwable> al = new ArrayList<>(); 1194 Throwable cause = throwable; 1195 al.add(cause); 1196 while ((cause.getCause() != null) && (cause != cause.getCause())) 1197 { 1198 cause = cause.getCause(); 1199 al.add(cause); 1200 } 1201 1202 AppendingStringBuffer sb = new AppendingStringBuffer(256); 1203 // first print the last cause 1204 int length = al.size() - 1; 1205 cause = al.get(length); 1206 if (throwable instanceof RuntimeException) 1207 { 1208 sb.append("Message: "); 1209 sb.append(throwable.getMessage()); 1210 sb.append("\n\n"); 1211 } 1212 sb.append("Root cause:\n\n"); 1213 outputThrowable(cause, sb, false); 1214 1215 if (length > 0) 1216 { 1217 sb.append("\n\nComplete stack:\n\n"); 1218 for (int i = 0; i < length; i++) 1219 { 1220 outputThrowable(al.get(i), sb, true); 1221 sb.append('\n'); 1222 } 1223 } 1224 return sb.toString(); 1225 } 1226 else 1227 { 1228 return "<Null Throwable>"; 1229 } 1230 } 1231 1232 private static void append(final AppendingStringBuffer buffer, final CharSequence s, 1233 final int from, final int to) 1234 { 1235 if (s instanceof AppendingStringBuffer) 1236 { 1237 AppendingStringBuffer asb = (AppendingStringBuffer)s; 1238 buffer.append(asb.getValue(), from, to - from); 1239 } 1240 else 1241 { 1242 buffer.append(s.subSequence(from, to)); 1243 } 1244 } 1245 1246 /** 1247 * Outputs the throwable and its stacktrace to the stringbuffer. If stopAtWicketSerlvet is true 1248 * then the output will stop when the org.apache.wicket servlet is reached. sun.reflect. 1249 * packages are filtered out. 1250 * 1251 * @param cause 1252 * @param sb 1253 * @param stopAtWicketServlet 1254 */ 1255 private static void outputThrowable(final Throwable cause, final AppendingStringBuffer sb, 1256 final boolean stopAtWicketServlet) 1257 { 1258 sb.append(cause); 1259 sb.append("\n"); 1260 StackTraceElement[] trace = cause.getStackTrace(); 1261 for (int i = 0; i < trace.length; i++) 1262 { 1263 String traceString = trace[i].toString(); 1264 if (!(traceString.startsWith("sun.reflect.") && (i > 1))) 1265 { 1266 sb.append(" at "); 1267 sb.append(traceString); 1268 sb.append("\n"); 1269 if (stopAtWicketServlet && 1270 (traceString.startsWith("org.apache.wicket.protocol.http.WicketServlet") || traceString.startsWith("org.apache.wicket.protocol.http.WicketFilter"))) 1271 { 1272 return; 1273 } 1274 } 1275 } 1276 } 1277 1278 private static int search(final CharSequence s, final String searchString, final int pos) 1279 { 1280 if (s instanceof String) 1281 { 1282 return ((String)s).indexOf(searchString, pos); 1283 } 1284 else if (s instanceof StringBuffer) 1285 { 1286 return ((StringBuffer)s).indexOf(searchString, pos); 1287 } 1288 else if (s instanceof StringBuilder) 1289 { 1290 return ((StringBuilder)s).indexOf(searchString, pos); 1291 } 1292 else if (s instanceof AppendingStringBuffer) 1293 { 1294 return ((AppendingStringBuffer)s).indexOf(searchString, pos); 1295 } 1296 else 1297 { 1298 return s.toString().indexOf(searchString, pos); 1299 } 1300 } 1301 1302 /** 1303 * Convert a nibble to a hex character 1304 * 1305 * @param nibble 1306 * the nibble to convert. 1307 * @return hex character 1308 */ 1309 private static char toHex(final int nibble) 1310 { 1311 return HEX_DIGIT[(nibble & 0xF)]; 1312 } 1313 1314 /** 1315 * Calculates the length of string in bytes, uses specified <code>charset</code> if provided. 1316 * 1317 * @param string 1318 * @param charset 1319 * (optional) character set to use when converting string to bytes 1320 * @return length of string in bytes 1321 */ 1322 public static int lengthInBytes(final String string, final Charset charset) 1323 { 1324 Args.notNull(string, "string"); 1325 if (charset != null) 1326 { 1327 try 1328 { 1329 return string.getBytes(charset.name()).length; 1330 } 1331 catch (UnsupportedEncodingException e) 1332 { 1333 throw new RuntimeException( 1334 "StringResourceStream created with unsupported charset: " + charset.name()); 1335 } 1336 } 1337 else 1338 { 1339 return string.getBytes().length; 1340 } 1341 } 1342 1343 /** 1344 * Extended {@link String#startsWith(String)} with support for case sensitivity 1345 * 1346 * @param str 1347 * @param prefix 1348 * @param caseSensitive 1349 * @return <code>true</code> if <code>str</code> starts with <code>prefix</code> 1350 */ 1351 public static boolean startsWith(final String str, final String prefix, 1352 final boolean caseSensitive) 1353 { 1354 if (caseSensitive) 1355 { 1356 return str.startsWith(prefix); 1357 } 1358 else 1359 { 1360 return str.toLowerCase(Locale.ROOT).startsWith(prefix.toLowerCase(Locale.ROOT)); 1361 } 1362 } 1363 1364 /** 1365 * returns the zero-based index of a character within a char sequence. this method mainly exists 1366 * as an faster alternative for <code>sequence.toString().indexOf(ch)</code>. 1367 * 1368 * @param sequence 1369 * character sequence 1370 * @param ch 1371 * character to search for 1372 * @return index of character within character sequence or <code>-1</code> if not found 1373 */ 1374 public static int indexOf(final CharSequence sequence, final char ch) 1375 { 1376 if (sequence != null) 1377 { 1378 for (int i = 0; i < sequence.length(); i++) 1379 { 1380 if (sequence.charAt(i) == ch) 1381 { 1382 return i; 1383 } 1384 } 1385 } 1386 1387 return -1; 1388 } 1389 1390 /** 1391 * <p> 1392 * Find the Levenshtein distance between two Strings. 1393 * </p> 1394 * 1395 * <p> 1396 * This is the number of changes needed to change one String into another, where each change is 1397 * a single character modification (deletion, insertion or substitution). 1398 * </p> 1399 * 1400 * <p> 1401 * The previous implementation of the Levenshtein distance algorithm was from <a 1402 * href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a> 1403 * </p> 1404 * 1405 * <p> 1406 * Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError which 1407 * can occur when my Java implementation is used with very large strings.<br> 1408 * This implementation of the Levenshtein distance algorithm is from <a 1409 * href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a> 1410 * </p> 1411 * 1412 * <pre> 1413 * Strings.getLevenshteinDistance(null, *) = IllegalArgumentException 1414 * Strings.getLevenshteinDistance(*, null) = IllegalArgumentException 1415 * Strings.getLevenshteinDistance("","") = 0 1416 * Strings.getLevenshteinDistance("","a") = 1 1417 * Strings.getLevenshteinDistance("aaapppp", "") = 7 1418 * Strings.getLevenshteinDistance("frog", "fog") = 1 1419 * Strings.getLevenshteinDistance("fly", "ant") = 3 1420 * Strings.getLevenshteinDistance("elephant", "hippo") = 7 1421 * Strings.getLevenshteinDistance("hippo", "elephant") = 7 1422 * Strings.getLevenshteinDistance("hippo", "zzzzzzzz") = 8 1423 * Strings.getLevenshteinDistance("hello", "hallo") = 1 1424 * </pre> 1425 * 1426 * Copied from Apache commons-lang StringUtils 3.0 1427 * 1428 * @param s 1429 * the first String, must not be null 1430 * @param t 1431 * the second String, must not be null 1432 * @return result distance 1433 * @throws IllegalArgumentException 1434 * if either String input {@code null} 1435 */ 1436 public static int getLevenshteinDistance(CharSequence s, CharSequence t) 1437 { 1438 if (s == null || t == null) 1439 { 1440 throw new IllegalArgumentException("Strings must not be null"); 1441 } 1442 1443 /* 1444 * The difference between this impl. and the previous is that, rather than creating and 1445 * retaining a matrix of size s.length()+1 by t.length()+1, we maintain two 1446 * single-dimensional arrays of length s.length()+1. The first, d, is the 'current working' 1447 * distance array that maintains the newest distance cost counts as we iterate through the 1448 * characters of String s. Each time we increment the index of String t we are comparing, d 1449 * is copied to p, the second int[]. Doing so allows us to retain the previous cost counts 1450 * as required by the algorithm (taking the minimum of the cost count to the left, up one, 1451 * and diagonally up and to the left of the current cost count being calculated). (Note that 1452 * the arrays aren't really copied anymore, just switched...this is clearly much better than 1453 * cloning an array or doing a System.arraycopy() each time through the outer loop.) 1454 * 1455 * Effectively, the difference between the two implementations is this one does not cause an 1456 * out of memory condition when calculating the LD over two very large strings. 1457 */ 1458 1459 int n = s.length(); // length of s 1460 int m = t.length(); // length of t 1461 1462 if (n == 0) 1463 { 1464 return m; 1465 } 1466 else if (m == 0) 1467 { 1468 return n; 1469 } 1470 1471 if (n > m) 1472 { 1473 // swap the input strings to consume less memory 1474 CharSequence tmp = s; 1475 s = t; 1476 t = tmp; 1477 n = m; 1478 m = t.length(); 1479 } 1480 1481 int p[] = new int[n + 1]; // 'previous' cost array, horizontally 1482 int d[] = new int[n + 1]; // cost array, horizontally 1483 int _d[]; // placeholder to assist in swapping p and d 1484 1485 // indexes into strings s and t 1486 int i; // iterates through s 1487 int j; // iterates through t 1488 1489 char t_j; // jth character of t 1490 1491 int cost; // cost 1492 1493 for (i = 0; i <= n; i++) 1494 { 1495 p[i] = i; 1496 } 1497 1498 for (j = 1; j <= m; j++) 1499 { 1500 t_j = t.charAt(j - 1); 1501 d[0] = j; 1502 1503 for (i = 1; i <= n; i++) 1504 { 1505 cost = s.charAt(i - 1) == t_j ? 0 : 1; 1506 // minimum of cell to the left+1, to the top+1, diagonally left and up +cost 1507 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); 1508 } 1509 1510 // copy current distance counts to 'previous row' distance counts 1511 _d = p; 1512 p = d; 1513 d = _d; 1514 } 1515 1516 // our last action in the above loop was to switch d and p, so p now 1517 // actually has the most recent cost counts 1518 return p[n]; 1519 } 1520 1521 /** 1522 * convert byte array to hex string 1523 * 1524 * @param bytes 1525 * bytes to convert to hexadecimal representation 1526 * 1527 * @return hex string 1528 */ 1529 public static String toHexString(byte[] bytes) 1530 { 1531 Args.notNull(bytes, "bytes"); 1532 1533 final StringBuilder hex = new StringBuilder(bytes.length << 1); 1534 1535 for (final byte b : bytes) 1536 { 1537 hex.append(toHex(b >> 4)); 1538 hex.append(toHex(b)); 1539 } 1540 return hex.toString(); 1541 } 1542 1543 1544 /** 1545 * Return this value as en enum value. 1546 * 1547 * @param value 1548 * the value to convert to an enum value 1549 * @param enumClass 1550 * the enum type 1551 * @return an enum value 1552 */ 1553 public static <T extends Enum<T>> T toEnum(final CharSequence value, final Class<T> enumClass) 1554 { 1555 Args.notNull(enumClass, "enumClass"); 1556 Args.notNull(value, "value"); 1557 1558 try 1559 { 1560 return Enum.valueOf(enumClass, value.toString()); 1561 } 1562 catch (Exception e) 1563 { 1564 throw new StringValueConversionException( 1565 String.format("Cannot convert '%s' to enum constant of type '%s'.", value, enumClass), e); 1566 } 1567 } 1568 1569 /** 1570 * Returns the original string if this one is not empty (i.e. {@link #isEmpty(CharSequence)} returns false), 1571 * otherwise the default one is returned. The default string might be itself an empty one. 1572 * 1573 * @param originalString 1574 * the original sting value 1575 * @param defaultValue 1576 * the default string to return if the original is empty 1577 * @return the original string value if not empty, the default one otherwise 1578 */ 1579 public static String defaultIfEmpty(String originalString, String defaultValue) 1580 { 1581 return isEmpty(originalString) ? defaultValue : originalString; 1582 } 1583}