/*
 * Decompiled with CFR 0.152.
 */
package org.springframework.ai.transformer.splitter;

import com.knuddels.jtokkit.Encodings;
import com.knuddels.jtokkit.api.Encoding;
import com.knuddels.jtokkit.api.EncodingRegistry;
import com.knuddels.jtokkit.api.EncodingType;
import com.knuddels.jtokkit.api.IntArrayList;
import java.util.ArrayList;
import java.util.List;
import org.springframework.ai.transformer.splitter.TextSplitter;
import org.springframework.util.Assert;

public class TokenTextSplitter
extends TextSplitter {
    private final EncodingRegistry registry = Encodings.newLazyEncodingRegistry();
    private final Encoding encoding = this.registry.getEncoding(EncodingType.CL100K_BASE);
    private int defaultChunkSize = 800;
    private int minChunkSizeChars = 350;
    private int minChunkLengthToEmbed = 5;
    private int maxNumChunks = 10000;
    private boolean keepSeparator = true;

    public TokenTextSplitter() {
    }

    public TokenTextSplitter(boolean keepSeparator) {
        this.keepSeparator = keepSeparator;
    }

    public TokenTextSplitter(int defaultChunkSize, int minChunkSizeChars, int minChunkLengthToEmbed, int maxNumChunks, boolean keepSeparator) {
        this.defaultChunkSize = defaultChunkSize;
        this.minChunkSizeChars = minChunkSizeChars;
        this.minChunkLengthToEmbed = minChunkLengthToEmbed;
        this.maxNumChunks = maxNumChunks;
        this.keepSeparator = keepSeparator;
    }

    @Override
    protected List<String> splitText(String text) {
        return this.split(text, this.defaultChunkSize);
    }

    public List<String> split(String text, int chunkSize) {
        String remaining_text;
        if (text == null || text.trim().isEmpty()) {
            return new ArrayList<String>();
        }
        List<Integer> tokens = this.getEncodedTokens(text);
        ArrayList<String> chunks = new ArrayList<String>();
        int num_chunks = 0;
        while (!tokens.isEmpty() && num_chunks < this.maxNumChunks) {
            String chunkTextToAppend;
            List<Integer> chunk = tokens.subList(0, Math.min(chunkSize, tokens.size()));
            String chunkText = this.decodeTokens(chunk);
            if (chunkText.trim().isEmpty()) {
                tokens = tokens.subList(chunk.size(), tokens.size());
                continue;
            }
            int lastPunctuation = Math.max(chunkText.lastIndexOf(46), Math.max(chunkText.lastIndexOf(63), Math.max(chunkText.lastIndexOf(33), chunkText.lastIndexOf(10))));
            if (lastPunctuation != -1 && lastPunctuation > this.minChunkSizeChars) {
                chunkText = chunkText.substring(0, lastPunctuation + 1);
            }
            String string = chunkTextToAppend = this.keepSeparator ? chunkText.trim() : chunkText.replace(System.lineSeparator(), " ").trim();
            if (chunkTextToAppend.length() > this.minChunkLengthToEmbed) {
                chunks.add(chunkTextToAppend);
            }
            tokens = tokens.subList(this.getEncodedTokens(chunkText).size(), tokens.size());
            ++num_chunks;
        }
        if (!tokens.isEmpty() && (remaining_text = this.decodeTokens(tokens).replace(System.lineSeparator(), " ").trim()).length() > this.minChunkLengthToEmbed) {
            chunks.add(remaining_text);
        }
        return chunks;
    }

    private List<Integer> getEncodedTokens(String text) {
        Assert.notNull((Object)text, (String)"Text must not be null");
        return this.encoding.encode(text).boxed();
    }

    private String decodeTokens(List<Integer> tokens) {
        Assert.notNull(tokens, (String)"Tokens must not be null");
        IntArrayList tokensIntArray = new IntArrayList(tokens.size());
        tokens.forEach(arg_0 -> ((IntArrayList)tokensIntArray).add(arg_0));
        return this.encoding.decode(tokensIntArray);
    }
}

