Class TokenizersLibrary
java.lang.Object
ai.djl.huggingface.tokenizers.jni.TokenizersLibrary
A class containing utilities to interact with the Tokenizer JNI layer.
-
Field Summary
Fields -
Method Summary
Modifier and TypeMethodDescriptionString[]batchDecode(long tokenizer, long[][] batchIds, boolean addSpecialTokens) long[]batchEncode(long tokenizer, String[] inputs, boolean addSpecialTokens) long[]batchEncodePair(long tokenizer, String[] text, String[] textPair, boolean addSpecialTokens) longcreateBpeTokenizer(String vocabulary, String merges) longcreateTokenizer(String identifier, String authToken) longdecode(long tokenizer, long[] ids, boolean addSpecialTokens) voiddeleteEncoding(long encoding) voiddeleteTokenizer(long handle) voiddisablePadding(long tokenizer) voiddisableTruncation(long tokenizer) longlongencodeDual(long tokenizer, String text, String textPair, boolean addSpecialTokens) longencodeList(long tokenizer, String[] inputs, boolean addSpecialTokens) long[]getAttentionMask(long encoding) intgetMaxLength(long tokenizer) intgetOverflowCount(long encoding) long[]getOverflowing(long encoding) getPaddingStrategy(long tokenizer) intgetPadToMultipleOf(long tokenizer) long[]getSpecialTokenMask(long encoding) intgetStride(long tokenizer) CharSpan[]getTokenCharSpans(long encoding) long[]getTokenIds(long encoding) String[]getTokens(long encoding) getTruncationStrategy(long tokenizer) long[]getTypeIds(long encoding) long[]getWordIds(long encoding) voidsetPadding(long tokenizer, int maxLength, String paddingStrategy, int padToMultipleOf) voidsetTruncation(long tokenizer, int maxLength, String truncationStrategy, int stride)
-
Field Details
-
LIB
-
-
Method Details
-
createTokenizer
-
createTokenizerFromString
-
createBpeTokenizer
-
deleteTokenizer
public void deleteTokenizer(long handle) -
encode
-
encodeDual
-
encodeList
-
batchEncode
-
batchEncodePair
-
batchDecode
-
deleteEncoding
public void deleteEncoding(long encoding) -
getTokenIds
public long[] getTokenIds(long encoding) -
getTypeIds
public long[] getTypeIds(long encoding) -
getWordIds
public long[] getWordIds(long encoding) -
getTokens
-
getAttentionMask
public long[] getAttentionMask(long encoding) -
getSpecialTokenMask
public long[] getSpecialTokenMask(long encoding) -
getTokenCharSpans
-
getOverflowing
public long[] getOverflowing(long encoding) -
getOverflowCount
public int getOverflowCount(long encoding) -
decode
-
getTruncationStrategy
-
getPaddingStrategy
-
getMaxLength
public int getMaxLength(long tokenizer) -
getStride
public int getStride(long tokenizer) -
getPadToMultipleOf
public int getPadToMultipleOf(long tokenizer) -
disablePadding
public void disablePadding(long tokenizer) -
setPadding
-
disableTruncation
public void disableTruncation(long tokenizer) -
setTruncation
-