/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.model.perceptron;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.document.sentence.Sentence;
import com.hankcs.hanlp.model.perceptron.PerceptronTagger;
import com.hankcs.hanlp.model.perceptron.common.TaskType;
import com.hankcs.hanlp.model.perceptron.feature.FeatureMap;
import com.hankcs.hanlp.model.perceptron.instance.CWSInstance;
import com.hankcs.hanlp.model.perceptron.instance.Instance;
import com.hankcs.hanlp.model.perceptron.model.LinearModel;
import com.hankcs.hanlp.model.perceptron.tagset.CWSTagSet;
import com.hankcs.hanlp.model.perceptron.utility.Utility;
import com.hankcs.hanlp.tokenizer.lexical.Segmenter;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

public class PerceptronSegmenter
extends PerceptronTagger
implements Segmenter {
    private final CWSTagSet CWSTagSet;

    public PerceptronSegmenter(LinearModel cwsModel) {
        super(cwsModel);
        if (cwsModel.featureMap.tagSet.type != TaskType.CWS) {
            throw new IllegalArgumentException(String.format("\u9519\u8bef\u7684\u6a21\u578b\u7c7b\u578b: \u4f20\u5165\u7684\u4e0d\u662f\u5206\u8bcd\u6a21\u578b\uff0c\u800c\u662f %s \u6a21\u578b", new Object[]{cwsModel.featureMap.tagSet.type}));
        }
        this.CWSTagSet = (CWSTagSet)cwsModel.featureMap.tagSet;
    }

    public PerceptronSegmenter(String cwsModelFile) throws IOException {
        this(new LinearModel(cwsModelFile));
    }

    public PerceptronSegmenter() throws IOException {
        this(HanLP.Config.PerceptronCWSModelPath);
    }

    public void segment(String text, List<String> output) {
        String normalized = this.normalize(text);
        this.segment(text, normalized, output);
    }

    @Override
    public void segment(String text, String normalized, List<String> output) {
        if (text.isEmpty()) {
            return;
        }
        CWSInstance instance = new CWSInstance(normalized, this.model.featureMap);
        int[] tagArray = instance.tagArray;
        this.model.viterbiDecode(instance, tagArray);
        StringBuilder result = new StringBuilder();
        result.append(text.charAt(0));
        for (int i = 1; i < tagArray.length; ++i) {
            if (tagArray[i] == this.CWSTagSet.B || tagArray[i] == this.CWSTagSet.S) {
                output.add(result.toString());
                result.setLength(0);
            }
            result.append(text.charAt(i));
        }
        if (result.length() != 0) {
            output.add(result.toString());
        }
    }

    @Override
    public List<String> segment(String sentence) {
        LinkedList<String> result = new LinkedList<String>();
        this.segment(sentence, result);
        return result;
    }

    public boolean learn(String segmentedSentence) {
        return this.learn(segmentedSentence.split("\\s+"));
    }

    public boolean learn(String ... words) {
        return this.learn(new CWSInstance(words, this.model.featureMap));
    }

    @Override
    protected Instance createInstance(Sentence sentence, FeatureMap featureMap) {
        return CWSInstance.create(sentence, featureMap);
    }

    @Override
    public double[] evaluate(String corpora) throws IOException {
        double[] prf = Utility.prf(Utility.evaluateCWS(corpora, this));
        return prf;
    }
}

