public class TaggedFilterConfiguration extends Object
Extraction rules can handle the following cases:
NON EXTRACTABLE - Default rule - don't extract it.
INLINE - Elements that are included with text.
EXCLUDED -Element and children that should be excluded from extraction.
INCLUDED - Elements and children within EXLCUDED ranges that should be extracted.
GROUP - Elements that are grouped together structurally such as lists, tables etc..
ATTRIBUTES - Attributes on specific elements which should be extracted. May be translatable or localizable.
ATTRIBUTES ANY ELEMENT - Convenience rule for attributes which can occur on any element. May be translatable or localizable.
TEXT UNIT - Elements whose start and end tags become part of a TextUnit rather than DocumentPart.
TEXT RUN - Elements which group together a common run of inline elements. For example, a style marker in OpenXML.
TEXT MARKER - Elements which immediately surround text.
Any of the above rules may have conditional rules based on attribute names and/or values. Conditional rules may be attached to both elements and attributes. More than one conditional rules are evaluated as OR expressions. For example, "type=button" OR "type=default".
| Modifier and Type | Class and Description |
|---|---|
static class |
TaggedFilterConfiguration.RULE_TYPE
AbstractMarkupFilter rule types. |
| Constructor and Description |
|---|
TaggedFilterConfiguration() |
TaggedFilterConfiguration(File configurationFile) |
TaggedFilterConfiguration(String configurationScript) |
TaggedFilterConfiguration(URL configurationPathAsResource) |
public static final String RULETYPES
public static final String GLOBAL_PRESERVE_WHITESPACE
public static final String GLOBAL_EXCLUDE_BY_DEFAULT
public static final String INLINE_CDATA
public static final String INLINE
public static final String GROUP
public static final String EXCLUDE
public static final String INCLUDE
public static final String TEXTUNIT
public static final String TEXTRUN
public static final String PRESERVE_WHITESPACE
public static final String SCRIPT
public static final String SERVER
public static final String ATTRIBUTE_TRANS
public static final String ATTRIBUTE_WRITABLE
public static final String ATTRIBUTE_READONLY
public static final String ATTRIBUTES_ONLY
public static final String ATTRIBUTE_ID
public static final String ATTRIBUTE_PRESERVE_WHITESPACE
public static final String ALL_ELEMENTS_EXCEPT
public static final String ONLY_THESE_ELEMENTS
public static final String EQUALS
public static final String NOT_EQUALS
public static final String MATCHES
public static final String ELEMENT_TYPE
public static final String WELLFORMED
public static final String USECODEFINDER
public static final String CODEFINDERRULES
public static final String GLOBAL_ESCAPE_NBSP
public static final String GLOBAL_PCDATA_SUBFILTER
public static final String GLOBAL_CDATA_SUBFILTER
public static final String CONDITIONS
public static final String SUBFILTER
public static final String ELEMENT_TRANSLATABLE_ATTRIBUTES
public static final String ELEMENT_WRITABLE_ATTRIBUTES
public static final String ELEMENT_READ_ONLY_ATTRIBUTES
public static final String ELEMENT_ID_ATTRIBUTES
public static final String PRESERVE_CONDITION
public static final String DEFAULT_CONDITION
public static final String SIMPLIFIER_RULES
public TaggedFilterConfiguration()
public TaggedFilterConfiguration(URL configurationPathAsResource)
public TaggedFilterConfiguration(File configurationFile)
public TaggedFilterConfiguration(String configurationScript)
public YamlConfigurationReader getConfigReader()
public boolean isGlobalPreserveWhitespace()
public boolean isGlobalExcludeByDefault()
public boolean isWellformed()
public boolean isInlineCdata()
public boolean isUseCodeFinder()
public boolean getBooleanParameter(String parameterName)
public int getIntegerParameter(String parameterName)
public String getGlobalPCDATASubfilter()
public String getGlobalCDATASubfilter()
public String getCodeFinderRules()
public boolean isRuleType(String ruleName, TaggedFilterConfiguration.RULE_TYPE ruleType, List<String> ruleTypes)
public boolean isRuleType(String ruleName, TaggedFilterConfiguration.RULE_TYPE ruleType)
public boolean hasDefinedInlineRule(String ruleName)
public String getElementType(net.htmlparser.jericho.Tag element)
public TaggedFilterConfiguration.RULE_TYPE findMatchingAttributeRule(String tag, Map<String,String> attributes, String attribute)
tag - attributes - attribute - TaggedFilterConfiguration.RULE_TYPEpublic TaggedFilterConfiguration.RULE_TYPE getConditionalAttributeRuleType(String attribute, Map<String,String> attributes)
public TaggedFilterConfiguration.RULE_TYPE getAttributeRuleType(String attribute)
public TaggedFilterConfiguration.RULE_TYPE getConditionalElementRuleType(String tag, Map<String,String> attributes)
public TaggedFilterConfiguration.RULE_TYPE getElementRuleTypeCandidate(String tag)
public boolean isTranslatableAttribute(String tag, String attribute, Map<String,String> attributes)
public boolean isReadOnlyLocalizableAttribute(String tag, String attribute, Map<String,String> attributes)
public boolean isWritableLocalizableAttribute(String tag, String attribute, Map<String,String> attributes)
public boolean isIdAttribute(String tag, String attribute, Map<String,String> attributes)
public TaggedFilterConfiguration.RULE_TYPE convertRuleAsStringToRuleType(String ruleType)
public boolean isPreserveWhitespaceCondition(String attribute, Map<String,String> attributes)
public boolean isDefaultWhitespaceCondition(String attribute, Map<String,String> attributes)
public String getSimplifierRules()
public void setSimplfierRules(String rules)
public boolean getQuoteModeDefined()
public void setQuoteModeDefined(boolean defined)
public int getQuoteMode()
public void setQuoteMode(String quoteMode)
Copyright © 2021. All rights reserved.