001/* 002 * MIT License 003 * 004 * Copyright (c) 2016 Michael Angstadt 005 * 006 * Permission is hereby granted, free of charge, to any person obtaining a copy 007 * of this software and associated documentation files (the "Software"), to deal 008 * in the Software without restriction, including without limitation the rights 009 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 010 * copies of the Software, and to permit persons to whom the Software is 011 * furnished to do so, subject to the following conditions: 012 * 013 * The above copyright notice and this permission notice shall be included in 014 * all copies or substantial portions of the Software. 015 * 016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 017 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 018 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 019 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 020 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 021 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 022 * SOFTWARE. 023 */ 024 025package com.github.mangstadt.vinnie.io; 026 027import static com.github.mangstadt.vinnie.Utils.ltrim; 028import static com.github.mangstadt.vinnie.Utils.rtrim; 029 030import java.io.Closeable; 031import java.io.IOException; 032import java.io.InputStreamReader; 033import java.io.Reader; 034import java.nio.charset.Charset; 035import java.nio.charset.IllegalCharsetNameException; 036import java.nio.charset.UnsupportedCharsetException; 037import java.util.ArrayList; 038import java.util.List; 039 040import com.github.mangstadt.vinnie.SyntaxStyle; 041import com.github.mangstadt.vinnie.VObjectProperty; 042import com.github.mangstadt.vinnie.codec.DecoderException; 043import com.github.mangstadt.vinnie.codec.QuotedPrintableCodec; 044 045/** 046 * <p> 047 * Parses a vobject data stream. 048 * </p> 049 * <p> 050 * <b>Example:</b> 051 * </p> 052 * 053 * <pre class="brush:java"> 054 * Reader reader = ... 055 * SyntaxRules rules = SyntaxRules.vcard(); 056 * VObjectReader vobjectReader = new VObjectReader(reader, rules); 057 * vobjectReader.parse(new VObjectDataListener(){ ... }); 058 * vobjectReader.close(); 059 * </pre> 060 * 061 * <p> 062 * <b>Quoted-printable Encoding</b> 063 * </p> 064 * <p> 065 * Property values encoded in quoted-printable encoding are automatically 066 * decoded. A property value is considered to be encoded in quoted-printable 067 * encoding if it has a "ENCODING=QUOTED-PRINTABLE" parameter. Even though the 068 * property value is automatically decoded, the ENCODING and CHARSET parameters 069 * are not removed from the parsed property object so that the caller can 070 * determine its original encoding. 071 * </p> 072 * 073 * <pre class="brush:java"> 074 * Reader reader = new StringReader("NOTE;ENCODING=QUOTED-PRINTABLE;CHARSET=UTF-8:=C2=A1Hola, mundo!"); 075 * VObjectReader vobjectReader = new VObjectReader(reader, ...); 076 * vobjectReader.parse(new VObjectDataAdapter() { 077 * public void onProperty(VObjectProperty property, Context context) { 078 * assertEquals("¡Hola, mundo!", property.getValue()); 079 * assertEquals("QUOTED-PRINTABLE", property.getParameters().first("ENCODING")); 080 * assertEquals("UTF-8", property.getParameters().first("CHARSET")); 081 * } 082 * }); 083 * vobjectReader.close(); 084 * </pre> 085 * 086 * <p> 087 * If a CHARSET parameter is not present in the quoted-printable property, then 088 * the character set of the input stream will be used to decode the value. If 089 * this cannot be determined, then the local JVM's default character set will be 090 * used. However, this behavior can be overridden by supplying your own 091 * character set to use in the event that a CHARSET parameter is not present. 092 * </p> 093 * 094 * <pre class="brush:java"> 095 * Reader reader = new StringReader("NOTE;ENCODING=QUOTED-PRINTABLE:=A1Hola, mundo!"); 096 * VObjectReader vobjectReader = new VObjectReader(reader, ...); 097 * vobjectReader.setDefaultQuotedPrintableCharset(Charset.forName("Windows-1252")); 098 * vobjectReader.parse(new VObjectDataAdapter() { 099 * public void onProperty(VObjectProperty property, Context context) { 100 * assertEquals("¡Hola, mundo!", property.getValue()); 101 * assertEquals("QUOTED-PRINTABLE", property.getParameters().first("ENCODING")); 102 * assertNull(property.getParameters().first("CHARSET")); 103 * } 104 * }); 105 * vobjectReader.close(); 106 * </pre> 107 * <p> 108 * Nameless ENCODING parameters are also recognized for backwards compatibility 109 * with old-style syntax. 110 * </p> 111 * 112 * <pre> 113 * NOTE;QUOTED-PRINTABLE;CHARSET=UTF-8:=C2=A1Hola, mundo! 114 * </pre> 115 * 116 * <p> 117 * If there is an error decoding a quoted-printable value, then a warning will 118 * be emitted and the value will be treated as plain-text. 119 * </p> 120 * 121 * <pre class="brush:java"> 122 * Reader reader = new StringReader("NOTE;ENCODING=QUOTED-PRINTABLE;CHARSET=UTF-8:=ZZ invalid"); 123 * VObjectReader vobjectReader = new VObjectReader(reader, ...); 124 * vobjectReader.parse(new VObjectDataAdapter() { 125 * public void onProperty(VObjectProperty property, Context context) { 126 * assertEquals("=ZZ invalid", property.getValue()); 127 * } 128 * public void onWarning(Warning warning, VObjectProperty property, Exception thrown, Context context) { 129 * assertEquals(Warning.QUOTED_PRINTABLE_ERROR, warning); 130 * } 131 * }); 132 * vobjectReader.close(); 133 * </pre> 134 * 135 * <p> 136 * <b>Circumflex Accent Encoding</b> 137 * </p> 138 * <p> 139 * Circumflex accent encoding allows newlines and double quote characters to be 140 * included inside of parameter values. Parameter values that are encoded using 141 * this encoding scheme are automatically decoded. Note that this encoding 142 * mechanism is only supported by new-style syntax. 143 * </p> 144 * 145 * <pre class="brush:java"> 146 * Reader reader = new StringReader("NOTE;X-AUTHOR=Fox ^'Spooky^' Mulder:The truth is out there."); 147 * VObjectReader vobjectReader = new VObjectReader(reader, new SyntaxRules(SyntaxStyle.NEW)); 148 * vobjectReader.parse(new VObjectDataAdapter() { 149 * public void onProperty(VObjectProperty property, Context context) { 150 * assertEquals("Fox \"Spooky\" Mulder", property.getParameters().first("X-AUTHOR")); 151 * } 152 * }); 153 * vobjectReader.close(); 154 * </pre> 155 * 156 * <p> 157 * In the rare event that your vobject data has raw "^" characters in its 158 * parameter values, and it does not use this encoding scheme, circumflex accent 159 * decoding can be turned off. 160 * </p> 161 * 162 * <pre class="brush:java"> 163 * Reader reader = new StringReader("NOTE;X-EMOTE=^_^:Good morning!"); 164 * VObjectReader vobjectReader = new VObjectReader(reader, new SyntaxRules(SyntaxStyle.NEW)); 165 * vobjectReader.setCaretDecodingEnabled(false); 166 * vobjectReader.parse(new VObjectDataAdapter() { 167 * public void onProperty(VObjectProperty property, Context context) { 168 * assertEquals("^_^", property.getParameters().first("X-EMOTE")); 169 * } 170 * }); 171 * vobjectReader.close(); 172 * </pre> 173 * 174 * <p> 175 * <b>Line Folding</b> 176 * </p> 177 * <p> 178 * Folded lines are automatically unfolded when read. 179 * </p> 180 * 181 * <pre class="brush:java"> 182 * String string = 183 * "NOTE:Lorem ipsum dolor sit amet\\, consectetur adipiscing elit. Vestibulum u\r\n" + 184 * " ltricies tempor orci ac dignissim."; 185 * Reader reader = new StringReader(string); 186 * VObjectReader vobjectReader = new VObjectReader(reader, ...); 187 * vobjectReader.parse(new VObjectDataAdapter() { 188 * public void onProperty(VObjectProperty property, Context context) { 189 * assertEquals("Lorem ipsum dolor sit amet\\, consectetur adipiscing elit. Vestibulum ultricies tempor orci ac dignissim.", property.getValue()); 190 * } 191 * }); 192 * vobjectReader.close(); 193 * </pre> 194 * 195 * @author Michael Angstadt 196 */ 197public class VObjectReader implements Closeable { 198 /** 199 * The local computer's newline character sequence. 200 */ 201 private final String NEWLINE = System.getProperty("line.separator"); 202 203 private final Reader reader; 204 private final SyntaxRules syntaxRules; 205 206 private boolean caretDecodingEnabled = true; 207 private Charset defaultQuotedPrintableCharset; 208 209 private final ComponentStack stack; 210 211 /** 212 * String buffer used when tokenizing a property. 213 */ 214 private final Buffer buffer = new Buffer(); 215 216 /** 217 * Keeps track of the current status of the parser. 218 */ 219 private final Context context; 220 221 /** 222 * The character that was read when it was determined that the current 223 * property being parsed has ended. 224 */ 225 private int leftOver = -1; 226 227 /** 228 * The current line number the parser is on. 229 */ 230 private int lineNumber = 1; 231 232 /** 233 * Has the entire stream been consumed? 234 */ 235 private boolean eos = false; 236 237 /** 238 * Creates a new vobject reader. 239 * @param reader the input stream 240 * @param syntaxRules defines the rules that are used to determine what kind 241 * of syntax the data is in 242 */ 243 public VObjectReader(Reader reader, SyntaxRules syntaxRules) { 244 this.reader = reader; 245 this.syntaxRules = syntaxRules; 246 stack = new ComponentStack(syntaxRules.getDefaultSyntaxStyle()); 247 context = new Context(stack.names); 248 249 if (reader instanceof InputStreamReader) { 250 InputStreamReader isr = (InputStreamReader) reader; 251 defaultQuotedPrintableCharset = Charset.forName(isr.getEncoding()); 252 } else { 253 defaultQuotedPrintableCharset = Charset.defaultCharset(); 254 } 255 } 256 257 /** 258 * <p> 259 * Gets the default character set to use when decoding quoted-printable 260 * values of properties that lack CHARSET parameters, or of properties whose 261 * CHARSET parameters are not recognized by the local JVM. 262 * </p> 263 * <p> 264 * By default, this is set to the character set of the {@link Reader} object 265 * that this class was constructed with. If the character set of the 266 * {@link Reader} object could not be determined, then it will be set to the 267 * local JVM's default character set. 268 * </p> 269 * @return the default quoted-printable character set 270 */ 271 public Charset getDefaultQuotedPrintableCharset() { 272 return defaultQuotedPrintableCharset; 273 } 274 275 /** 276 * <p> 277 * Sets the character set to use when decoding quoted-printable values of 278 * properties that lack CHARSET parameters, or of properties whose CHARSET 279 * parameters are not recognized by the local JVM. 280 * </p> 281 * <p> 282 * By default, this is set to the character set of the {@link Reader} object 283 * that this class was constructed with. If the character set of the 284 * {@link Reader} object could not be determined, then it will be set to the 285 * local JVM's default character set. 286 * </p> 287 * @param charset the default quoted-printable character set (cannot be 288 * null) 289 */ 290 public void setDefaultQuotedPrintableCharset(Charset charset) { 291 defaultQuotedPrintableCharset = charset; 292 } 293 294 /** 295 * <p> 296 * Gets whether the reader will decode parameter values that use circumflex 297 * accent encoding (enabled by default). This escaping mechanism allows 298 * newlines and double quotes to be included in parameter values. It is only 299 * supported by new style syntax. 300 * </p> 301 * 302 * <table class="simpleTable"> 303 * <caption>Characters encoded by circumflex accent encoding</caption> 304 * <tr> 305 * <th>Raw Character</th> 306 * <th>Encoded Character</th> 307 * </tr> 308 * <tr> 309 * <td>{@code "}</td> 310 * <td>{@code ^'}</td> 311 * </tr> 312 * <tr> 313 * <td><i>newline</i></td> 314 * <td>{@code ^n}</td> 315 * </tr> 316 * <tr> 317 * <td>{@code ^}</td> 318 * <td>{@code ^^}</td> 319 * </tr> 320 * </table> 321 * 322 * <p> 323 * Example: 324 * </p> 325 * 326 * <pre> 327 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPittsburgh, PA 15212":40.446816;80.00566 328 * </pre> 329 * 330 * @return true if circumflex accent decoding is enabled, false if not 331 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 332 */ 333 public boolean isCaretDecodingEnabled() { 334 return caretDecodingEnabled; 335 } 336 337 /** 338 * <p> 339 * Sets whether the reader will decode parameter values that use circumflex 340 * accent encoding (enabled by default). This escaping mechanism allows 341 * newlines and double quotes to be included in parameter values. It is only 342 * supported by new style syntax. 343 * </p> 344 * 345 * <table class="simpleTable"> 346 * <caption>Characters encoded by circumflex accent encoding</caption> 347 * <tr> 348 * <th>Raw Character</th> 349 * <th>Encoded Character</th> 350 * </tr> 351 * <tr> 352 * <td>{@code "}</td> 353 * <td>{@code ^'}</td> 354 * </tr> 355 * <tr> 356 * <td><i>newline</i></td> 357 * <td>{@code ^n}</td> 358 * </tr> 359 * <tr> 360 * <td>{@code ^}</td> 361 * <td>{@code ^^}</td> 362 * </tr> 363 * </table> 364 * 365 * <p> 366 * Example: 367 * </p> 368 * 369 * <pre> 370 * GEO;X-ADDRESS="Pittsburgh Pirates^n115 Federal St^nPittsburgh, PA 15212":geo:40.446816,-80.00566 371 * </pre> 372 * 373 * @param enable true to use circumflex accent decoding, false not to 374 * @see <a href="http://tools.ietf.org/html/rfc6868">RFC 6868</a> 375 */ 376 public void setCaretDecodingEnabled(boolean enable) { 377 caretDecodingEnabled = enable; 378 } 379 380 /** 381 * <p> 382 * Starts or continues to parse the data off the input stream. 383 * </p> 384 * <p> 385 * This method blocks until one of the following events happen: 386 * </p> 387 * <ol> 388 * <li>The end of the input stream has been reached or</li> 389 * <li>One of the methods in the given {@link VObjectDataListener} 390 * implementation has invoked {@link Context#stop()}.</li> 391 * </ol> 392 * @param listener callback interface for handling data as it is read off 393 * the input stream 394 * @throws IOException if there's a problem reading from the input stream 395 */ 396 public void parse(VObjectDataListener listener) throws IOException { 397 context.stop = false; 398 399 while (!eos && !context.stop) { 400 context.lineNumber = lineNumber; 401 buffer.clear(); 402 context.unfoldedLine.clear(); 403 404 VObjectProperty property = parseProperty(listener); 405 406 if (context.unfoldedLine.size() == 0) { 407 //input stream was empty 408 return; 409 } 410 411 if (property == null) { 412 listener.onWarning(Warning.MALFORMED_LINE, null, null, context); 413 continue; 414 } 415 416 /* 417 * Note: Property names are trimmed when checking for BEGIN and END 418 * properties because old style syntax allows there to be whitespace 419 * around the colon character for these two properties. Component 420 * names are trimmed for the same reason. 421 */ 422 423 if ("BEGIN".equalsIgnoreCase(property.getName().trim())) { 424 String componentName = property.getValue().trim().toUpperCase(); 425 if (componentName.length() == 0) { 426 listener.onWarning(Warning.EMPTY_BEGIN, null, null, context); 427 continue; 428 } 429 430 listener.onComponentBegin(componentName, context); 431 432 stack.push(componentName); 433 continue; 434 } 435 436 if ("END".equalsIgnoreCase(property.getName().trim())) { 437 String componentName = property.getValue().trim().toUpperCase(); 438 if (componentName.length() == 0) { 439 listener.onWarning(Warning.EMPTY_END, null, null, context); 440 continue; 441 } 442 443 //find the component that this END property matches up with 444 int popCount = stack.popCount(componentName); 445 if (popCount == 0) { 446 //END property does not match up with any BEGIN properties, so ignore 447 listener.onWarning(Warning.UNMATCHED_END, null, null, context); 448 continue; 449 } 450 451 while (popCount > 0) { 452 String poppedName = stack.pop(); 453 listener.onComponentEnd(poppedName, context); 454 popCount--; 455 } 456 continue; 457 } 458 459 if ("VERSION".equalsIgnoreCase(property.getName())) { 460 String parentComponent = stack.peekName(); 461 if (syntaxRules.hasSyntaxRules(parentComponent)) { 462 SyntaxStyle style = syntaxRules.getSyntaxStyle(parentComponent, property.getValue()); 463 if (style == null) { 464 listener.onWarning(Warning.UNKNOWN_VERSION, property, null, context); 465 } else { 466 listener.onVersion(property.getValue(), context); 467 stack.updateSyntax(style); 468 continue; 469 } 470 } 471 } 472 473 listener.onProperty(property, context); 474 } 475 } 476 477 /** 478 * Parses the next property off the input stream. 479 * @param listener the data listener (for reporting warnings) 480 * @return the parsed property or null if the property could not be parsed 481 * @throws IOException if there was a problem reading from the input stream 482 */ 483 private VObjectProperty parseProperty(VObjectDataListener listener) throws IOException { 484 VObjectProperty property = new VObjectProperty(); 485 486 /* 487 * The syntax style to assume the data is in. 488 */ 489 SyntaxStyle syntax = stack.peekSyntax(); 490 491 /* 492 * The name of the parameter we're currently inside of. 493 */ 494 String curParamName = null; 495 496 /* 497 * The character that was used to escape the current character (for 498 * parameter values). 499 */ 500 char paramValueEscapeChar = 0; 501 502 /* 503 * Are we currently inside a parameter value that is surrounded with 504 * double-quotes? 505 */ 506 boolean inQuotes = false; 507 508 /* 509 * Are we currently inside the property value? 510 */ 511 boolean inValue = false; 512 513 /* 514 * Does the line use quoted-printable encoding, and does it end all of 515 * its folded lines with a "=" character? 516 */ 517 boolean foldedQuotedPrintableLine = false; 518 519 /* 520 * Are we currently inside the whitespace that prepends a folded line? 521 */ 522 boolean inFoldedLineWhitespace = false; 523 524 /* 525 * The current character. 526 */ 527 char ch = 0; 528 529 /* 530 * The previous character. 531 */ 532 char prevChar; 533 534 while (true) { 535 prevChar = ch; 536 537 int read = nextChar(); 538 if (read < 0) { 539 //end of stream 540 eos = true; 541 break; 542 } 543 544 ch = (char) read; 545 546 if (prevChar == '\r' && ch == '\n') { 547 /* 548 * The newline was already processed when the "\r" character was 549 * encountered, so ignore the accompanying "\n" character. 550 */ 551 continue; 552 } 553 554 if (isNewline(ch)) { 555 foldedQuotedPrintableLine = (inValue && prevChar == '=' && property.getParameters().isQuotedPrintable()); 556 if (foldedQuotedPrintableLine) { 557 /* 558 * Remove the "=" character that sometimes appears at the 559 * end of quoted-printable lines that are followed by a 560 * folded line. 561 */ 562 buffer.chop(); 563 context.unfoldedLine.chop(); 564 } 565 566 //keep track of the current line number 567 lineNumber++; 568 569 continue; 570 } 571 572 if (isNewline(prevChar)) { 573 if (isWhitespace(ch)) { 574 /* 575 * This line is a continuation of the previous line (the 576 * line is folded). 577 */ 578 inFoldedLineWhitespace = true; 579 continue; 580 } 581 582 if (foldedQuotedPrintableLine) { 583 /* 584 * The property's parameters indicate that the property 585 * value is quoted-printable. And the previous line ended 586 * with an equals sign. This means that folding whitespace 587 * may not be prepended to folded lines like it should. 588 */ 589 } else { 590 /* 591 * We're reached the end of the property. 592 */ 593 leftOver = ch; 594 break; 595 } 596 } 597 598 if (inFoldedLineWhitespace) { 599 if (isWhitespace(ch) && syntax == SyntaxStyle.OLD) { 600 /* 601 * 2.1 allows multiple whitespace characters to be used for 602 * folding (section 2.1.3). 603 */ 604 continue; 605 } 606 inFoldedLineWhitespace = false; 607 } 608 609 context.unfoldedLine.append(ch); 610 611 if (inValue) { 612 buffer.append(ch); 613 continue; 614 } 615 616 //decode escaped parameter value character 617 if (paramValueEscapeChar != 0) { 618 char escapeChar = paramValueEscapeChar; 619 paramValueEscapeChar = 0; 620 621 switch (escapeChar) { 622 case '\\': 623 switch (ch) { 624 case '\\': 625 buffer.append(ch); 626 continue; 627 case ';': 628 /* 629 * Semicolons can only be escaped in old style parameter 630 * values. If a new style parameter value has 631 * semicolons, the value should be surrounded in double 632 * quotes. 633 */ 634 buffer.append(ch); 635 continue; 636 } 637 break; 638 case '^': 639 switch (ch) { 640 case '^': 641 buffer.append(ch); 642 continue; 643 case 'n': 644 buffer.append(NEWLINE); 645 continue; 646 case '\'': 647 buffer.append('"'); 648 continue; 649 } 650 break; 651 } 652 653 /* 654 * Treat the escape character as a normal character because it's 655 * not a valid escape sequence. 656 */ 657 buffer.append(escapeChar).append(ch); 658 continue; 659 } 660 661 //check for a parameter value escape character 662 if (curParamName != null) { 663 switch (syntax) { 664 case OLD: 665 if (ch == '\\') { 666 paramValueEscapeChar = ch; 667 continue; 668 } 669 break; 670 case NEW: 671 if (ch == '^' && caretDecodingEnabled) { 672 paramValueEscapeChar = ch; 673 continue; 674 } 675 break; 676 } 677 } 678 679 //set the group 680 if (ch == '.' && property.getGroup() == null && property.getName() == null) { 681 property.setGroup(buffer.getAndClear()); 682 continue; 683 } 684 685 if ((ch == ';' || ch == ':') && !inQuotes) { 686 if (property.getName() == null) { 687 //set the property name 688 property.setName(buffer.getAndClear()); 689 } else { 690 //set a parameter value 691 String paramValue = buffer.getAndClear(); 692 if (syntax == SyntaxStyle.OLD) { 693 //old style allows whitespace to surround the "=", so remove it 694 paramValue = ltrim(paramValue); 695 } 696 property.getParameters().put(curParamName, paramValue); 697 curParamName = null; 698 } 699 700 if (ch == ':') { 701 //the rest of the line is the property value 702 inValue = true; 703 } 704 continue; 705 } 706 707 if (property.getName() != null) { 708 //it's a multi-valued parameter 709 if (ch == ',' && curParamName != null && !inQuotes && syntax != SyntaxStyle.OLD) { 710 String paramValue = buffer.getAndClear(); 711 property.getParameters().put(curParamName, paramValue); 712 continue; 713 } 714 715 //set the parameter name 716 if (ch == '=' && curParamName == null) { 717 String paramName = buffer.getAndClear().toUpperCase(); 718 if (syntax == SyntaxStyle.OLD) { 719 //old style allows whitespace to surround the "=", so remove it 720 paramName = rtrim(paramName); 721 } 722 curParamName = paramName; 723 continue; 724 } 725 726 //entering/leaving a double-quoted parameter value (new style only) 727 if (ch == '"' && curParamName != null && syntax != SyntaxStyle.OLD) { 728 inQuotes = !inQuotes; 729 continue; 730 } 731 } 732 733 buffer.append(ch); 734 } 735 736 /* 737 * Line or stream ended before the property value was reached. 738 */ 739 if (!inValue) { 740 return null; 741 } 742 743 property.setValue(buffer.getAndClear()); 744 if (property.getParameters().isQuotedPrintable()) { 745 decodeQuotedPrintable(property, listener); 746 } 747 748 return property; 749 } 750 751 /** 752 * Decodes the given property's value from quoted-printable encoding. 753 * @param property the property 754 * @param listener the data listener 755 */ 756 private void decodeQuotedPrintable(VObjectProperty property, VObjectDataListener listener) { 757 Charset charset = getCharset(property, listener); 758 if (charset == null) { 759 charset = defaultQuotedPrintableCharset; 760 } 761 762 String value = property.getValue(); 763 QuotedPrintableCodec codec = new QuotedPrintableCodec(charset.name()); 764 try { 765 value = codec.decode(value); 766 } catch (DecoderException e) { 767 listener.onWarning(Warning.QUOTED_PRINTABLE_ERROR, property, e, context); 768 return; 769 } 770 771 property.setValue(value); 772 } 773 774 /** 775 * Gets the character set the given property is encoded in. 776 * @param property the property 777 * @param listener the data listener 778 * @return the character set or null if the character is not set or could 779 * not be determined 780 */ 781 private Charset getCharset(VObjectProperty property, VObjectDataListener listener) { 782 Exception thrown; 783 try { 784 return property.getParameters().getCharset(); 785 } catch (IllegalCharsetNameException e) { 786 //name contains illegal characters 787 thrown = e; 788 } catch (UnsupportedCharsetException e) { 789 //not recognized by the JVM 790 thrown = e; 791 } 792 793 listener.onWarning(Warning.UNKNOWN_CHARSET, property, thrown, context); 794 return null; 795 } 796 797 /** 798 * Gets the next character in the input stream. 799 * @return the next character or -1 if the end of the stream has been 800 * reached 801 * @throws IOException if there's a problem reading from the input stream 802 */ 803 private int nextChar() throws IOException { 804 if (leftOver >= 0) { 805 /* 806 * Use the character that was left over from the previous invocation 807 * of "readLine()". 808 */ 809 int ch = leftOver; 810 leftOver = -1; 811 return ch; 812 } 813 814 return reader.read(); 815 } 816 817 /** 818 * Determines if the given character is a newline character. 819 * @param ch the character 820 * @return true if it's a newline character, false if not 821 */ 822 private static boolean isNewline(char ch) { 823 return ch == '\n' || ch == '\r'; 824 } 825 826 /** 827 * Determines if the given character is a space or a tab. 828 * @param ch the character 829 * @return true if it's a space or a tab, false if not 830 */ 831 private static boolean isWhitespace(char ch) { 832 return ch == ' ' || ch == '\t'; 833 } 834 835 /** 836 * Keeps track of the hierarchy of nested components and their syntax 837 * styles. 838 */ 839 private static class ComponentStack { 840 /** 841 * The hierarchy of components the parser is currently inside of. 842 */ 843 private final List<String> names = new ArrayList<String>(); 844 845 /** 846 * <p> 847 * The syntax style of each component in the hierarchy. 848 * </p> 849 * 850 * <p> 851 * Note: This will always be one element larger than the "names" list 852 * because it must remember the style of the "root" (for properties that 853 * are not inside of a component, should there happen to be any). 854 * </p> 855 */ 856 private final List<SyntaxStyle> syntax = new ArrayList<SyntaxStyle>(); 857 858 /** 859 * Creates a new stack. 860 * @param defaultSyntax the default syntax style 861 */ 862 public ComponentStack(SyntaxStyle defaultSyntax) { 863 syntax.add(defaultSyntax); 864 } 865 866 /** 867 * Pushes a component onto the stack. 868 * @param component the component name 869 */ 870 public void push(String component) { 871 names.add(component); 872 syntax.add(peekSyntax()); 873 } 874 875 /** 876 * Removes the top component from the stack. 877 * @return the name of the component that was removed 878 */ 879 public String pop() { 880 syntax.remove(syntax.size() - 1); 881 return names.remove(names.size() - 1); 882 } 883 884 /** 885 * Gets the number of calls to {@link #pop()} it would take to pop the 886 * given component name. 887 * @param name the component name 888 * @return the number of pops or 0 if the name was not found 889 */ 890 public int popCount(String name) { 891 int index = names.lastIndexOf(name); 892 return (index < 0) ? 0 : names.size() - index; 893 } 894 895 /** 896 * Gets the top component name. 897 * @return the top component name or null if the name stack is empty 898 */ 899 public String peekName() { 900 return names.isEmpty() ? null : names.get(names.size() - 1); 901 } 902 903 /** 904 * Gets the top syntax style. 905 * @return the top syntax style or null if the syntax stack is empty 906 */ 907 public SyntaxStyle peekSyntax() { 908 return syntax.isEmpty() ? null : syntax.get(syntax.size() - 1); 909 } 910 911 /** 912 * Replaces the top syntax style. 913 * @param style the syntax style 914 */ 915 public void updateSyntax(SyntaxStyle style) { 916 syntax.set(syntax.size() - 1, style); 917 } 918 } 919 920 /** 921 * Closes the underlying input stream. 922 */ 923 public void close() throws IOException { 924 reader.close(); 925 } 926}