Class TokenizersLibrary

java.lang.Object
ai.djl.huggingface.tokenizers.jni.TokenizersLibrary

public final class TokenizersLibrary extends Object
A class containing utilities to interact with the Tokenizer JNI layer.
  • Field Details

  • Method Details

    • createTokenizer

      public long createTokenizer(String identifier, String authToken)
    • createTokenizerFromString

      public long createTokenizerFromString(String json)
    • createBpeTokenizer

      public long createBpeTokenizer(String vocabulary, String merges)
    • deleteTokenizer

      public void deleteTokenizer(long handle)
    • encode

      public long encode(long tokenizer, String text, boolean addSpecialTokens)
    • encodeDual

      public long encodeDual(long tokenizer, String text, String textPair, boolean addSpecialTokens)
    • encodeList

      public long encodeList(long tokenizer, String[] inputs, boolean addSpecialTokens)
    • batchEncode

      public long[] batchEncode(long tokenizer, String[] inputs, boolean addSpecialTokens)
    • batchEncodePair

      public long[] batchEncodePair(long tokenizer, String[] text, String[] textPair, boolean addSpecialTokens)
    • batchDecode

      public String[] batchDecode(long tokenizer, long[][] batchIds, boolean addSpecialTokens)
    • deleteEncoding

      public void deleteEncoding(long encoding)
    • getTokenIds

      public long[] getTokenIds(long encoding)
    • getTypeIds

      public long[] getTypeIds(long encoding)
    • getWordIds

      public long[] getWordIds(long encoding)
    • getTokens

      public String[] getTokens(long encoding)
    • getAttentionMask

      public long[] getAttentionMask(long encoding)
    • getSpecialTokenMask

      public long[] getSpecialTokenMask(long encoding)
    • getTokenCharSpans

      public CharSpan[] getTokenCharSpans(long encoding)
    • getOverflowing

      public long[] getOverflowing(long encoding)
    • getOverflowCount

      public int getOverflowCount(long encoding)
    • decode

      public String decode(long tokenizer, long[] ids, boolean addSpecialTokens)
    • getTruncationStrategy

      public String getTruncationStrategy(long tokenizer)
    • getPaddingStrategy

      public String getPaddingStrategy(long tokenizer)
    • getMaxLength

      public int getMaxLength(long tokenizer)
    • getStride

      public int getStride(long tokenizer)
    • getPadToMultipleOf

      public int getPadToMultipleOf(long tokenizer)
    • disablePadding

      public void disablePadding(long tokenizer)
    • setPadding

      public void setPadding(long tokenizer, int maxLength, String paddingStrategy, int padToMultipleOf)
    • disableTruncation

      public void disableTruncation(long tokenizer)
    • setTruncation

      public void setTruncation(long tokenizer, int maxLength, String truncationStrategy, int stride)