001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.camel.util; 018 019import java.io.IOException; 020import java.io.InputStream; 021import java.io.StringReader; 022import java.util.Stack; 023 024import javax.xml.XMLConstants; 025import javax.xml.parsers.DocumentBuilder; 026import javax.xml.parsers.DocumentBuilderFactory; 027import javax.xml.parsers.SAXParser; 028import javax.xml.parsers.SAXParserFactory; 029 030import org.w3c.dom.Document; 031import org.w3c.dom.Element; 032import org.w3c.dom.Node; 033 034import org.xml.sax.Attributes; 035import org.xml.sax.InputSource; 036import org.xml.sax.Locator; 037import org.xml.sax.SAXException; 038import org.xml.sax.helpers.DefaultHandler; 039 040/** 041 * An XML parser that uses SAX to include line and column number for each XML element in the parsed Document. 042 * <p> 043 * The line number and column number can be obtained from a Node/Element using 044 * <pre> 045 * String lineNumber = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER); 046 * String lineNumberEnd = (String) node.getUserData(XmlLineNumberParser.LINE_NUMBER_END); 047 * String columnNumber = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER); 048 * String columnNumberEnd = (String) node.getUserData(XmlLineNumberParser.COLUMN_NUMBER_END); 049 * </pre> 050 */ 051public final class XmlLineNumberParser { 052 053 public static final String LINE_NUMBER = "lineNumber"; 054 public static final String COLUMN_NUMBER = "colNumber"; 055 public static final String LINE_NUMBER_END = "lineNumberEnd"; 056 public static final String COLUMN_NUMBER_END = "colNumberEnd"; 057 058 /** 059 * Allows to plugin a custom text transformer in the parser, that can transform all the text content 060 */ 061 public interface XmlTextTransformer { 062 063 String transform(String text); 064 065 } 066 067 private XmlLineNumberParser() { 068 } 069 070 /** 071 * Parses the XML. 072 * 073 * @param is the XML content as an input stream 074 * @return the DOM model 075 * @throws Exception is thrown if error parsing 076 */ 077 public static Document parseXml(final InputStream is) throws Exception { 078 return parseXml(is, null); 079 } 080 081 /** 082 * Parses the XML. 083 * 084 * @param is the XML content as an input stream 085 * @param xmlTransformer the XML transformer 086 * @return the DOM model 087 * @throws Exception is thrown if error parsing 088 */ 089 public static Document parseXml(final InputStream is, final XmlTextTransformer xmlTransformer) throws Exception { 090 return parseXml(is, xmlTransformer, null, null); 091 } 092 093 /** 094 * Parses the XML. 095 * 096 * @param is the XML content as an input stream 097 * @param xmlTransformer the XML transformer 098 * @param rootNames one or more root names that is used as baseline for beginning the parsing, for example camelContext to start parsing 099 * when Camel is discovered. Multiple names can be defined separated by comma 100 * @param forceNamespace an optional namespaces to force assign to each node. This may be needed for JAXB unmarshalling from XML -> POJO. 101 * @return the DOM model 102 * @throws Exception is thrown if error parsing 103 */ 104 public static Document parseXml(final InputStream is, XmlTextTransformer xmlTransformer, String rootNames, final String forceNamespace) throws Exception { 105 ObjectHelper.notNull(is, "is"); 106 107 final XmlTextTransformer transformer = xmlTransformer == null ? new NoopTransformer() : xmlTransformer; 108 final Document doc; 109 SAXParser parser; 110 final SAXParserFactory factory = SAXParserFactory.newInstance(); 111 factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, Boolean.TRUE); 112 parser = factory.newSAXParser(); 113 final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 114 // turn off validator and loading external dtd 115 dbf.setValidating(false); 116 dbf.setNamespaceAware(true); 117 dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, Boolean.TRUE); 118 dbf.setFeature("http://xml.org/sax/features/namespaces", false); 119 dbf.setFeature("http://xml.org/sax/features/validation", false); 120 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); 121 dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); 122 dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); 123 dbf.setFeature("http://xml.org/sax/features/external-general-entities", false); 124 final DocumentBuilder docBuilder = dbf.newDocumentBuilder(); 125 doc = docBuilder.newDocument(); 126 127 final Stack<Element> elementStack = new Stack<>(); 128 final StringBuilder textBuffer = new StringBuilder(); 129 final DefaultHandler handler = new DefaultHandler() { 130 private Locator locator; 131 private boolean found; 132 133 @Override 134 public void setDocumentLocator(final Locator locator) { 135 this.locator = locator; // Save the locator, so that it can be used later for line tracking when traversing nodes. 136 this.found = rootNames == null; 137 } 138 139 private boolean isRootName(String qName) { 140 for (String root : rootNames.split(",")) { 141 if (qName.equals(root)) { 142 return true; 143 } 144 } 145 return false; 146 } 147 148 @Override 149 public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { 150 addTextIfNeeded(); 151 152 if (rootNames != null && !found) { 153 if (isRootName(qName)) { 154 found = true; 155 } 156 } 157 158 if (found) { 159 Element el; 160 if (forceNamespace != null) { 161 el = doc.createElementNS(forceNamespace, qName); 162 } else { 163 el = doc.createElement(qName); 164 } 165 166 for (int i = 0; i < attributes.getLength(); i++) { 167 el.setAttribute(transformer.transform(attributes.getQName(i)), transformer.transform(attributes.getValue(i))); 168 } 169 170 el.setUserData(LINE_NUMBER, String.valueOf(this.locator.getLineNumber()), null); 171 el.setUserData(COLUMN_NUMBER, String.valueOf(this.locator.getColumnNumber()), null); 172 elementStack.push(el); 173 } 174 } 175 176 @Override 177 public void endElement(final String uri, final String localName, final String qName) { 178 if (!found) { 179 return; 180 } 181 182 addTextIfNeeded(); 183 184 final Element closedEl = elementStack.isEmpty() ? null : elementStack.pop(); 185 if (closedEl != null) { 186 if (elementStack.isEmpty()) { 187 // Is this the root element? 188 doc.appendChild(closedEl); 189 } else { 190 final Element parentEl = elementStack.peek(); 191 parentEl.appendChild(closedEl); 192 } 193 194 closedEl.setUserData(LINE_NUMBER_END, String.valueOf(this.locator.getLineNumber()), null); 195 closedEl.setUserData(COLUMN_NUMBER_END, String.valueOf(this.locator.getColumnNumber()), null); 196 } 197 } 198 199 @Override 200 public void characters(final char ch[], final int start, final int length) throws SAXException { 201 char[] chars = new char[length]; 202 System.arraycopy(ch, start, chars, 0, length); 203 String s = new String(chars); 204 s = transformer.transform(s); 205 textBuffer.append(s); 206 } 207 208 @Override 209 public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { 210 // do not resolve external dtd 211 return new InputSource(new StringReader("")); 212 } 213 214 // Outputs text accumulated under the current node 215 private void addTextIfNeeded() { 216 if (textBuffer.length() > 0) { 217 final Element el = elementStack.isEmpty() ? null : elementStack.peek(); 218 if (el != null) { 219 final Node textNode = doc.createTextNode(textBuffer.toString()); 220 el.appendChild(textNode); 221 textBuffer.delete(0, textBuffer.length()); 222 } 223 } 224 } 225 }; 226 parser.parse(is, handler); 227 228 return doc; 229 } 230 231 private static final class NoopTransformer implements XmlTextTransformer { 232 233 @Override 234 public String transform(String text) { 235 return text; 236 } 237 238 } 239 240}