public class TextUnitUtil extends Object
| Constructor and Description |
|---|
TextUnitUtil() |
| Modifier and Type | Method and Description |
|---|---|
static AltTranslationsAnnotation |
addAltTranslation(Segment seg,
AltTranslation alt)
Adds an
AltTranslation object to a given Segment. |
static AltTranslationsAnnotation |
addAltTranslation(TextContainer targetContainer,
AltTranslation alt)
Adds an
AltTranslation object to a given TextContainer. |
static void |
addQualifiers(ITextUnit textUnit,
String qualifier)
Adds to the skeleton of a given text unit resource qualifiers (quotation marks etc.) to appear around text.
|
static void |
addQualifiers(ITextUnit textUnit,
String startQualifier,
String endQualifier)
Adds to the skeleton of a given text unit resource qualifiers (quotation marks etc.) to appear around text.
|
static ITextUnit |
buildTU(ITextUnit textUnit,
String name,
TextContainer source,
TextContainer target,
LocaleId locId,
String comment)
Creates a new text unit resource or updates the one passed as the parameter.
|
static ITextUnit |
buildTU(String source)
Creates a new text unit resource based a given string becoming the source text of the text unit.
|
static ITextUnit |
buildTU(String srcPart,
String skelPart)
Creates a new text unit resource based on a given string becoming the source text of the text unit, and a
skeleton string, which gets appended to the new text unit's skeleton.
|
static ITextUnit |
buildTU(TextContainer source)
Creates a new text unit resource based on a given text container object becoming the source part of the text
unit.
|
static void |
convertTextPart_whitespaceCodesToText(TextPart textPart) |
static void |
convertTextParts_whitespaceCodesToText(TextContainer tc) |
static void |
convertTextPartsToCodes(TextContainer tc)
Convert all TextParts (not Segments) in a given TextContainer to each contain
a single code with the part's text.
|
static void |
convertTextPartToCode(TextPart textPart)
Create a single code with a given TextPart's text.
|
static GenericSkeleton |
convertToSkeleton(ITextUnit textUnit)
Copies source and target text of a given text unit into a newly created skeleton.
|
static TextFragment |
copySrcCodeDataToMatchingTrgCodes(TextFragment oriSrc,
TextFragment newTrg,
boolean alwaysCopyCodes,
boolean addMissingCodes,
TextFragment newSrc,
ITextUnit parent)
Copies the aligned inline codes of the source to the corresponding target codes.
|
static void |
deleteLastChar(TextFragment textFragment)
Deletes the last non-whitespace and non-code character of a given text fragment.
|
static boolean |
endsWith(TextFragment textFragment,
String substr)
Indicates if a given text fragment ends with a given sub-string.
|
static TextFragment |
expandCodes(TextFragment tf)
Expand codes that have been previously merged.
|
static TextFragment |
extractSegMarkers(TextFragment tf,
TextFragment original,
boolean removeFromOriginal)
Extracts segment and text part markers from a given string, creates codes (place-holder type) for those markers,
and appends them to a given text fragment.
|
static GenericSkeleton |
forceSkeleton(ITextUnit tu)
Makes sure that a given text unit contains a skeleton.
|
static String |
getCodedText(TextFragment textFragment)
Gets text of a given text fragment object possibly containing inline codes.
|
static char |
getLastChar(TextFragment textFragment)
Gets the last character of a given text fragment.
|
static <A extends IAnnotation> |
getSourceAnnotation(ITextUnit textUnit,
Class<A> type)
Gets an annotation attached to the source part of a given text unit resource.
|
static String |
getSourceText(ITextUnit textUnit)
Gets the coded text of the first part of the source of a given text unit resource.
|
static String |
getSourceText(ITextUnit textUnit,
boolean removeCodes)
Gets the coded text of the first part of a source part of a given text unit resource.
|
static <A extends IAnnotation> |
getTargetAnnotation(ITextUnit textUnit,
LocaleId locId,
Class<A> type)
Gets an annotation attached to the target part of a given text unit resource in a given locale.
|
static String |
getTargetText(ITextUnit textUnit,
LocaleId locId)
Gets text of the first part of the target of a given text unit resource in the given locale.
|
static String |
getText(TextFragment textFragment)
Extracts text from the given text fragment.
|
static String |
getText(TextFragment textFragment,
List<Integer> markerPositions)
Extracts text from the given text fragment.
|
static boolean |
hasExternalRefMarker(Code code) |
static boolean |
hasMergedCode(TextFragment tf) |
static boolean |
hasSegEndMarker(Code code) |
static boolean |
hasSegOrTpMarker(Code code) |
static boolean |
hasSegStartMarker(Code code) |
static boolean |
hasSource(ITextUnit textUnit)
Indicates if a given text unit resource is null, or its source part is null or empty.
|
static boolean |
hasTpEndMarker(Code code) |
static boolean |
hasTpStartMarker(Code code) |
static boolean |
isApproved(ITextUnit tu,
LocaleId targetLocale) |
static boolean |
isEmpty(ITextUnit textUnit)
Indicates if a given text unit resource is null, or its source part is null or empty.
|
static boolean |
isEmpty(ITextUnit textUnit,
boolean ignoreWS)
Indicates if a given text unit resource is null, or its source part is null or empty.
|
static boolean |
isEmpty(TextFragment textFragment)
Indicates if a given text fragment object is null, or the text it contains is null or empty.
|
static boolean |
isStandalone(ITextUnit tu) |
static boolean |
isWellformed(TextContainer tc) |
static boolean |
isWellformed(TextFragment tf) |
static int |
lastIndexOf(TextFragment textFragment,
String findWhat)
Returns the index (within a given text fragment object) of the rightmost occurrence of the specified substring.
|
static boolean |
needsPreserveWhitespaces(ITextUnit tu) |
static boolean |
needsPreserveWhitespaces(TextContainer tc)
Detects if a given TextContainer contains whitespace characters to
be preserved in XML.
|
static String |
printMarkerIndexes(TextFragment textFragment) |
static String |
printMarkers(TextFragment textFragment) |
static String |
removeAndReplaceCodes(String codedText,
String isolatedCodeReplacement)
Removes the opening and closing codes and replaces the isolated codes in text with the specified string.
|
static void |
removeCodes(ITextUnit textUnit,
boolean removeTargetCodes)
Removes all inline tags in the source (or optionally the target) text unit resource.
|
static String |
removeCodes(String codedText)
Removes all inline tags from a given coded text.
|
static void |
removeCodes(TextContainer tc)
Removes all inline tags from the given
TextContainer |
static void |
removeCodes(TextFragment tf)
Removes all inline tags from the given
TextFragment |
static boolean |
removeQualifiers(ITextUnit textUnit,
String qualifier)
Removes from the source part of a given text unit resource qualifiers (quotation marks etc.) around text.
|
static boolean |
removeQualifiers(ITextUnit textUnit,
String startQualifier,
String endQualifier)
Removes from the source part of a given un-segmented text unit resource qualifiers (parenthesis, quotation marks
etc.) around text.
|
static void |
renumberCodes(TextContainer tc) |
static String |
restoreSegmentation(TextContainer tc,
TextFragment segStorage)
Restores original segmentation of a given text container from a given text fragment created with storeSegmentation().
|
static void |
setSourceAnnotation(ITextUnit textUnit,
IAnnotation annotation)
Attaches an annotation to the source part of a given text unit resource.
|
static void |
setSourceText(ITextUnit textUnit,
String text)
Sets the coded text of the un-segmented source of a given text unit resource.
|
static void |
setTargetAnnotation(ITextUnit textUnit,
LocaleId locId,
IAnnotation annotation)
Attaches an annotation to the target part of a given text unit resource in a given language.
|
static void |
setTargetText(ITextUnit textUnit,
LocaleId locId,
String text)
Sets the coded text of the the target part of a given text unit resource in a given language.
|
static void |
simplifyCodes(ITextUnit textUnit,
String rules,
boolean removeLeadingTrailingCodes)
Simplifies all possible tags in the source part of a given text unit resource.
|
static void |
simplifyCodes(ITextUnit textUnit,
String rules,
boolean removeLeadingTrailingCodes,
boolean mergeCodes)
Simplifies all possible tags in the source part of a given text unit resource.
|
static TextFragment[] |
simplifyCodes(TextContainer tc,
String rules,
boolean removeLeadingTrailingCodes)
Simplifies all possible tags in a given text container.
|
static TextFragment[] |
simplifyCodes(TextContainer tc,
String rules,
boolean removeLeadingTrailingCodes,
boolean mergeCodes)
Simplifies all possible tags in a given text container.
|
static TextFragment[] |
simplifyCodes(TextFragment tf,
String rules,
boolean removeLeadingTrailingCodes)
Simplifies all possible tags in a given text fragment.
|
static TextFragment[] |
simplifyCodes(TextFragment tf,
String rules,
boolean removeLeadingTrailingCodes,
boolean mergeCodes)
Simplifies all possible tags in a given text fragment.
|
static void |
simplifyCodesPostSegmentation(ITextUnit textUnit,
String rules,
boolean removeLeadingTrailingCodes,
boolean mergeCodes)
Simplifies all possible tags in the source part of a given text unit resource.
|
static void |
simplifyCodesPostSegmentation(TextContainer tc,
String rules,
boolean removeLeadingTrailingCodes,
boolean mergeCodes)
Simplifies all possible tags in the source part of a given text unit resource.
|
static TextFragment |
storeSegmentation(TextContainer tc) |
static String |
testMarkers() |
static String |
toText(String text,
List<Code> codes)
Returns representation of a given coded text with code data enclosed in brackets.
|
static String |
toText(TextFragment tf)
Returns the content of a given text fragment, including the original codes whenever
possible.
|
static void |
trimLeading(TextFragment textFragment)
Removes leading whitespaces from a given text fragment.
|
static void |
trimLeading(TextFragment textFragment,
GenericSkeleton skel)
Removes leading whitespaces from a given text fragment, puts removed whitespaces to the given skeleton.
|
static void |
trimSegments(TextContainer tc) |
static void |
trimSegments(TextContainer tc,
boolean trimLeading,
boolean trimTrailing)
Trims segments of a given text container that contains leading or trailing whitespaces.
|
static void |
trimTrailing(TextFragment textFragment)
Removes trailing whitespaces from a given text fragment.
|
static void |
trimTrailing(TextFragment textFragment,
GenericSkeleton skel)
Removes trailing whitespaces from a given text fragment, puts removed whitespaces to the given skeleton.
|
static void |
trimTU(ITextUnit textUnit,
boolean trimLeading,
boolean trimTrailing)
Removes leading and/or trailing whitespaces from the source part of a given text unit resource.
|
static void |
unsegmentTU(ITextUnit tu) |
public static void trimLeading(TextFragment textFragment)
textFragment - the text fragment which leading whitespaces are to be removed.public static TextFragment copySrcCodeDataToMatchingTrgCodes(TextFragment oriSrc, TextFragment newTrg, boolean alwaysCopyCodes, boolean addMissingCodes, TextFragment newSrc, ITextUnit parent)
TextFragment's codes are
already id aligned.
If they are not then call TextFragment.alignCodeIds(TextFragment) to align the codes
based on their native data
This method compares an original source with a new target, and transfer the codes of the original source at their equivalent places in the new target. The text of the new target is left untouched.
If the option alwaysCopyCodes is false, the codes are copied only if it the original source codes have references or if the new target codes are empty.
oriSrc - the original source text fragment.newTrg - the new target text fragment (This is the fragment that will be adjusted).alwaysCopyCodes - indicates the adjustment of the codes is always done.addMissingCodes - indicates if codes that are in the original source but not in the new target
should be
automatically added at the end of the new target copy (even if they are removable)
if there are references in the original source and/or empty codes in the new
target.newSrc - the new source text fragment (Can be null). When available to speed up the inline
code
processing in some cases.parent - the parent text unit (Can be null. Used for error information only).public static void trimLeading(TextFragment textFragment, GenericSkeleton skel)
textFragment - the text fragment which leading whitespaces are to be removed.skel - the skeleton to put the removed whitespaces.public static void trimTrailing(TextFragment textFragment)
textFragment - the text fragment which trailing whitespaces are to be removed.public static void trimTrailing(TextFragment textFragment, GenericSkeleton skel)
textFragment - the text fragment which trailing whitespaces are to be removed.skel - the skeleton to put the removed whitespaces.public static boolean endsWith(TextFragment textFragment, String substr)
textFragment - the text fragment to examine.substr - the text to lookup.public static boolean isEmpty(ITextUnit textUnit)
textUnit - the text unit to check.public static boolean hasSource(ITextUnit textUnit)
textUnit - the text unit to check.public static boolean isEmpty(ITextUnit textUnit, boolean ignoreWS)
textUnit - the text unit to check.ignoreWS - if true and the text unit contains only whitespaces, then the text unit is considered empty.public static String getSourceText(ITextUnit textUnit)
textUnit - the text unit resource which source text should be returned.public static String getSourceText(ITextUnit textUnit, boolean removeCodes)
textUnit - the text unit resource which source text should be returned.removeCodes - true if possible inline codes should be removed.public static String getTargetText(ITextUnit textUnit, LocaleId locId)
textUnit - the text unit resource which source text should be returned.locId - the locale the target part being sought.public static String getCodedText(TextFragment textFragment)
textFragment - the given text fragment object.public static String getText(TextFragment textFragment, List<Integer> markerPositions)
textFragment - TextFragment object with possible codes insidemarkerPositions - List to store initial positions of removed code markers. use null to not store the markers.public static String printMarkerIndexes(TextFragment textFragment)
public static String printMarkers(TextFragment textFragment)
public static String getText(TextFragment textFragment)
textFragment - TextFragment object with possible codes insidepublic static char getLastChar(TextFragment textFragment)
textFragment - the text fragment to examin.public static void deleteLastChar(TextFragment textFragment)
textFragment - the text fragment to examine.public static int lastIndexOf(TextFragment textFragment, String findWhat)
textFragment - the text fragment to examine.findWhat - the substring to search for.-1 is returned.public static boolean isEmpty(TextFragment textFragment)
textFragment - the text fragment to examine.public static ITextUnit buildTU(TextContainer source)
source - the given text container becoming the source part of the text unit.public static ITextUnit buildTU(String source)
source - the given string becoming the source text of the text unit.public static ITextUnit buildTU(String srcPart, String skelPart)
srcPart - the given string becoming the source text of the created text unit.skelPart - the skeleton string appended to the new text unit's skeleton.public static ITextUnit buildTU(ITextUnit textUnit, String name, TextContainer source, TextContainer target, LocaleId locId, String comment)
textUnit - the text unit to be modified, or null to create a new text unit.name - name of the new text unit, or a new name for the existing one.source - the text container object becoming the source part of the text unit.target - the text container object becoming the target part of the text unit.locId - the locale of the target part (passed in the target parameter).comment - the optional comment becoming a NOTE property of the text unit.public static GenericSkeleton forceSkeleton(ITextUnit tu)
tu - the given text unit to have a skeleton.public static GenericSkeleton convertToSkeleton(ITextUnit textUnit)
textUnit - the text unit to be copied into a skeleton.public static <A extends IAnnotation> A getSourceAnnotation(ITextUnit textUnit, Class<A> type)
A - a class implementing IAnnotationtextUnit - the given text unit resource.type - reference to the requested annotation type.public static void setSourceAnnotation(ITextUnit textUnit, IAnnotation annotation)
textUnit - the given text unit resource.annotation - the annotation to be attached to the source part of the text unit.public static <A extends IAnnotation> A getTargetAnnotation(ITextUnit textUnit, LocaleId locId, Class<A> type)
A - a class implementing IAnnotationtextUnit - the given text unit resource.locId - the locale of the target part being sought.type - reference to the requested annotation type.public static void setTargetAnnotation(ITextUnit textUnit, LocaleId locId, IAnnotation annotation)
textUnit - the given text unit resource.locId - the locale of the target part being attached to.annotation - the annotation to be attached to the target part of the text unit.public static void setSourceText(ITextUnit textUnit, String text)
textUnit - the given text unit resource.text - the text to be set.public static void setTargetText(ITextUnit textUnit, LocaleId locId, String text)
textUnit - the given text unit resource.locId - the locale of the target part being set.text - the text to be set.public static void trimTU(ITextUnit textUnit, boolean trimLeading, boolean trimTrailing)
textUnit - the given text unit resource.trimLeading - true to remove leading whitespaces if there are any.trimTrailing - true to remove trailing whitespaces if there are any.public static void addQualifiers(ITextUnit textUnit, String startQualifier, String endQualifier)
textUnit - the given text unit resourcestartQualifier - the qualifier to be added before textendQualifier - the qualifier to be added after textpublic static void addQualifiers(ITextUnit textUnit, String qualifier)
textUnit - the given text unit resourcequalifier - the qualifier to be added before and after textpublic static boolean removeQualifiers(ITextUnit textUnit, String startQualifier, String endQualifier)
textUnit - the given text unit resource.startQualifier - the qualifier to be removed before source text.endQualifier - the qualifier to be removed after source text.public static void simplifyCodes(ITextUnit textUnit, String rules, boolean removeLeadingTrailingCodes)
textUnit - the given text unitrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the skeleton.public static void simplifyCodes(ITextUnit textUnit, String rules, boolean removeLeadingTrailingCodes, boolean mergeCodes)
textUnit - the given text unitrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codesmergeCodes - true to merge adjacent codes, false to leave as-is
of the source part and place their text in the skeleton.public static void simplifyCodesPostSegmentation(ITextUnit textUnit, String rules, boolean removeLeadingTrailingCodes, boolean mergeCodes)
textUnit - the given text unitrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the corresponding inter-segment TextPart.mergeCodes - true to merge adjacent codes, false to leave as-ispublic static void simplifyCodesPostSegmentation(TextContainer tc, String rules, boolean removeLeadingTrailingCodes, boolean mergeCodes)
tc - the given text containerrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the corresponding inter-segment TextPart.mergeCodes - true to merge adjacent codes, false to leave as-ispublic static TextFragment expandCodes(TextFragment tf)
tf - The original TextFragment with possibly merged codes.TextFragment with expanded codes or original if there are no codes
or they have not been merged.public static boolean hasMergedCode(TextFragment tf)
public static void removeCodes(ITextUnit textUnit, boolean removeTargetCodes)
textUnit - the given text unitremoveTargetCodes - - remove target codes?public static void removeCodes(TextContainer tc)
TextContainertc - the given text containerpublic static void removeCodes(TextFragment tf)
TextFragmenttf - the given text fragmentpublic static String removeCodes(String codedText)
codedText - the given coded text stringpublic static String removeAndReplaceCodes(String codedText, String isolatedCodeReplacement)
codedText - The given coded text stringisolatedCodeReplacement - The isolated code replacementpublic static TextFragment[] simplifyCodes(TextFragment tf, String rules, boolean removeLeadingTrailingCodes)
tf - the given text fragmentrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the skeleton.public static TextFragment[] simplifyCodes(TextFragment tf, String rules, boolean removeLeadingTrailingCodes, boolean mergeCodes)
tf - the given text fragmentrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the skeleton.mergeCodes - true to merge adjacent codes, false to leave as-ispublic static TextFragment[] simplifyCodes(TextContainer tc, String rules, boolean removeLeadingTrailingCodes)
tc - the given text containerrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the skeleton.public static TextFragment[] simplifyCodes(TextContainer tc, String rules, boolean removeLeadingTrailingCodes, boolean mergeCodes)
tc - the given text containerrules - rules for the data-driven simplificationremoveLeadingTrailingCodes - true to remove leading and/or trailing codes
of the source part and place their text in the skeleton.mergeCodes - true to merge adjacent codes, false to leave as-ispublic static boolean removeQualifiers(ITextUnit textUnit, String qualifier)
textUnit - the given text unit resource.qualifier - the qualifier to be removed before and after source text.public static AltTranslationsAnnotation addAltTranslation(TextContainer targetContainer, AltTranslation alt)
AltTranslation object to a given TextContainer. The AltTranslationsAnnotation
annotation is created if it does not exist already.targetContainer - the container where to add the object.alt - alternate translation to add.public static AltTranslationsAnnotation addAltTranslation(Segment seg, AltTranslation alt)
AltTranslation object to a given Segment.
The AltTranslationsAnnotation annotation is created if it does not exist already.seg - the segment where to add the object.alt - alternate translation to add.public static TextFragment storeSegmentation(TextContainer tc)
public static void trimSegments(TextContainer tc, boolean trimLeading, boolean trimTrailing)
tc - the given text containertrimLeading - true to remove leading whitespaces of a segmenttrimTrailing - true to remove trailing whitespaces of a segmentpublic static void trimSegments(TextContainer tc)
public static TextFragment extractSegMarkers(TextFragment tf, TextFragment original, boolean removeFromOriginal)
tf - the given text fragment to append extracted codesoriginal - the given stringremoveFromOriginal - remove found markers from the given stringpublic static boolean hasSegOrTpMarker(Code code)
public static boolean hasSegStartMarker(Code code)
public static boolean hasSegEndMarker(Code code)
public static boolean hasTpStartMarker(Code code)
public static boolean hasTpEndMarker(Code code)
public static boolean hasExternalRefMarker(Code code)
public static String restoreSegmentation(TextContainer tc, TextFragment segStorage)
tc - the given text containersegStorage - the text fragment created with storeSegmentation() and containing the original segmentation infopublic static String testMarkers()
public static String toText(TextFragment tf)
tf - the given text fragmentpublic static String toText(String text, List<Code> codes)
text - the given coded textcodes - the given list of codespublic static void convertTextPartsToCodes(TextContainer tc)
tc - the given TextContainerpublic static void convertTextPartToCode(TextPart textPart)
textPart - the given TextPartpublic static void convertTextParts_whitespaceCodesToText(TextContainer tc)
public static void convertTextPart_whitespaceCodesToText(TextPart textPart)
public static boolean isStandalone(ITextUnit tu)
public static void renumberCodes(TextContainer tc)
public static boolean needsPreserveWhitespaces(TextContainer tc)
tc - the given TextContainer object.public static boolean needsPreserveWhitespaces(ITextUnit tu)
public static boolean isWellformed(TextFragment tf)
public static boolean isWellformed(TextContainer tc)
public static void unsegmentTU(ITextUnit tu)
Copyright © 2021. All rights reserved.