/*
 * Decompiled with CFR 0.152.
 */
package org.apache.solr.update.processor;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import opennlp.tools.util.Span;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.opennlp.OpenNLPTokenizer;
import org.apache.lucene.analysis.opennlp.tools.NLPNERTaggerOp;
import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.FieldType;
import org.apache.solr.update.AddUpdateCommand;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessor;
import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
extends UpdateRequestProcessorFactory
implements SolrCoreAware {
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    public static final String SOURCE_PARAM = "source";
    public static final String DEST_PARAM = "dest";
    public static final String PATTERN_PARAM = "pattern";
    public static final String REPLACEMENT_PARAM = "replacement";
    public static final String MODEL_PARAM = "modelFile";
    public static final String ANALYZER_FIELD_TYPE_PARAM = "analyzerFieldType";
    public static final String ENTITY_TYPE = "{EntityType}";
    private FieldMutatingUpdateProcessorFactory.SelectorParams srcInclusions = new FieldMutatingUpdateProcessorFactory.SelectorParams();
    private Collection<FieldMutatingUpdateProcessorFactory.SelectorParams> srcExclusions = new ArrayList<FieldMutatingUpdateProcessorFactory.SelectorParams>();
    private FieldMutatingUpdateProcessor.FieldNameSelector srcSelector = null;
    private String modelFile = null;
    private String analyzerFieldType = null;
    private String dest = null;
    private Pattern pattern = null;

    protected final FieldMutatingUpdateProcessor.FieldNameSelector getSourceSelector() {
        if (null != this.srcSelector) {
            return this.srcSelector;
        }
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "selector was never initialized, inform(SolrCore) never called???");
    }

    public void init(NamedList args) {
        if (0 <= args.indexOf(SOURCE_PARAM, 0) && 0 <= args.indexOf(DEST_PARAM, 0)) {
            this.initSourceSelectorSyntax(args);
        } else if (0 <= args.indexOf(PATTERN_PARAM, 0) && 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) {
            this.initSimpleRegexReplacement(args);
        } else {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "A combination of either 'source' + 'dest', or 'replacement' + 'pattern' init params are mandatory");
        }
        Object modelParam = args.remove(MODEL_PARAM);
        if (null == modelParam) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required init param 'modelFile'");
        }
        if (!(modelParam instanceof CharSequence)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'modelFile' must be a <str>");
        }
        this.modelFile = modelParam.toString();
        Object analyzerFieldTypeParam = args.remove(ANALYZER_FIELD_TYPE_PARAM);
        if (null == analyzerFieldTypeParam) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required init param 'analyzerFieldType'");
        }
        if (!(analyzerFieldTypeParam instanceof CharSequence)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'analyzerFieldType' must be a <str>");
        }
        this.analyzerFieldType = analyzerFieldTypeParam.toString();
        if (0 < args.size()) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unexpected init param(s): '" + args.getName(0) + "'");
        }
        super.init(args);
    }

    private void initSimpleRegexReplacement(NamedList args) {
        if (0 <= args.indexOf(SOURCE_PARAM, 0) || 0 <= args.indexOf(DEST_PARAM, 0)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Short hand syntax must not be mixed with full syntax. Found pattern and replacement but also found source or dest");
        }
        assert (args.indexOf(SOURCE_PARAM, 0) < 0);
        Object patt = args.remove(PATTERN_PARAM);
        Object replacement = args.remove(REPLACEMENT_PARAM);
        if (null == patt || null == replacement) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init params 'pattern' and 'replacement' are both mandatory if 'source' and 'dest' are not both specified");
        }
        if (0 != args.size()) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init params 'replacement' and 'pattern' must be children of 'dest' to be combined with other options.");
        }
        if (!(replacement instanceof String)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'replacement' must be a string (i.e. <str>)");
        }
        if (!(patt instanceof String)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'pattern' must be a string (i.e. <str>)");
        }
        this.dest = replacement.toString();
        try {
            this.pattern = Pattern.compile(patt.toString());
        }
        catch (PatternSyntaxException pe) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param pattern is not a valid regex pattern: " + patt, (Throwable)pe);
        }
        this.srcInclusions = new FieldMutatingUpdateProcessorFactory.SelectorParams();
        this.srcInclusions.fieldRegex = Collections.singletonList(this.pattern);
    }

    private void initSourceSelectorSyntax(NamedList args) {
        if (0 <= args.indexOf(PATTERN_PARAM, 0) || 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Short hand syntax must not be mixed with full syntax. Found source and dest but also found pattern or replacement");
        }
        Object d = args.remove(DEST_PARAM);
        assert (null != d);
        List sources = args.getAll(SOURCE_PARAM);
        assert (null != sources);
        if (1 == sources.size() && sources.get(0) instanceof NamedList) {
            NamedList selectorConfig = (NamedList)args.remove(SOURCE_PARAM);
            this.srcInclusions = OpenNLPExtractNamedEntitiesUpdateProcessorFactory.parseSelectorParams(selectorConfig);
            List excList = selectorConfig.getAll("exclude");
            for (Object excObj : excList) {
                if (null == excObj) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'source' child 'exclude' can not be null");
                }
                if (!(excObj instanceof NamedList)) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'source' child 'exclude' must be <lst/>");
                }
                NamedList exc = (NamedList)excObj;
                this.srcExclusions.add(OpenNLPExtractNamedEntitiesUpdateProcessorFactory.parseSelectorParams(exc));
                if (0 < exc.size()) {
                    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'source' has unexpected 'exclude' sub-param(s): '" + selectorConfig.getName(0) + "'");
                }
                selectorConfig.remove("exclude");
            }
            if (0 < selectorConfig.size()) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'source' contains unexpected child param(s): '" + selectorConfig.getName(0) + "'");
            }
            sources.remove(0);
        }
        if (1 <= sources.size()) {
            this.srcInclusions.fieldName = new HashSet(args.removeConfigArgs(SOURCE_PARAM));
        }
        if (this.srcInclusions == null) {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init params do not specify any field from which to extract entities, please supply either source and dest or pattern and replacement. See javadocsfor OpenNLPExtractNamedEntitiesUpdateProcessor for further details.");
        }
        if (d instanceof NamedList) {
            NamedList destList = (NamedList)d;
            Object patt = destList.remove(PATTERN_PARAM);
            Object replacement = destList.remove(REPLACEMENT_PARAM);
            if (null == patt || null == replacement) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'dest' children 'pattern' and 'replacement' are both mandatory and can not be null");
            }
            if (!(patt instanceof String) || !(replacement instanceof String)) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'dest' children 'pattern' and 'replacement' must both be strings (i.e. <str>)");
            }
            if (0 != destList.size()) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'dest' has unexpected children: '" + destList.getName(0) + "'");
            }
            try {
                this.pattern = Pattern.compile(patt.toString());
            }
            catch (PatternSyntaxException pe) {
                throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'dest' child 'pattern is not a valid regex pattern: " + patt, (Throwable)pe);
            }
            this.dest = replacement.toString();
        } else if (d instanceof String) {
            this.dest = d.toString();
        } else {
            throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Init param 'dest' must either be a string (i.e. <str>) or a list (i.e. <lst>) containing 'pattern' and 'replacement");
        }
    }

    public void inform(SolrCore core) {
        this.srcSelector = FieldMutatingUpdateProcessor.createFieldNameSelector((SolrResourceLoader)core.getResourceLoader(), (SolrCore)core, (FieldMutatingUpdateProcessorFactory.SelectorParams)this.srcInclusions, (FieldMutatingUpdateProcessor.FieldNameSelector)FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
        for (FieldMutatingUpdateProcessorFactory.SelectorParams exc : this.srcExclusions) {
            this.srcSelector = FieldMutatingUpdateProcessor.wrap((FieldMutatingUpdateProcessor.FieldNameSelector)this.srcSelector, (FieldMutatingUpdateProcessor.FieldNameSelector)FieldMutatingUpdateProcessor.createFieldNameSelector((SolrResourceLoader)core.getResourceLoader(), (SolrCore)core, (FieldMutatingUpdateProcessorFactory.SelectorParams)exc, (FieldMutatingUpdateProcessor.FieldNameSelector)FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
        }
        try {
            OpenNLPOpsFactory.getNERTaggerModel((String)this.modelFile, (ResourceLoader)core.getResourceLoader());
        }
        catch (IOException e) {
            throw new IllegalArgumentException(e);
        }
    }

    public final UpdateRequestProcessor getInstance(final SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
        final FieldMutatingUpdateProcessor.FieldNameSelector srcSelector = this.getSourceSelector();
        return new UpdateRequestProcessor(next){
            private final NLPNERTaggerOp nerTaggerOp;
            private Analyzer analyzer;
            {
                super(x0);
                this.analyzer = null;
                try {
                    this.nerTaggerOp = OpenNLPOpsFactory.getNERTagger((String)OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.modelFile);
                    FieldType fieldType = req.getSchema().getFieldTypeByName(OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.analyzerFieldType);
                    if (fieldType == null) {
                        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "analyzerFieldType '" + OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.analyzerFieldType + "' not found in the schema.");
                    }
                    this.analyzer = fieldType.getIndexAnalyzer();
                }
                catch (IOException e) {
                    throw new IllegalArgumentException(e);
                }
            }

            public void processAdd(AddUpdateCommand cmd) throws IOException {
                SolrInputDocument doc = cmd.getSolrInputDocument();
                HashMap<String, SolrInputField> destMap = new HashMap<String, SolrInputField>();
                for (String string : doc.getFieldNames()) {
                    Collection srcFieldValues;
                    if (!srcSelector.shouldMutate(string) || (srcFieldValues = doc.getFieldValues(string)) == null || srcFieldValues.isEmpty()) continue;
                    String resolvedDest = OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.dest;
                    if (OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.pattern != null) {
                        Matcher matcher = OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.pattern.matcher(string);
                        if (matcher.find()) {
                            resolvedDest = matcher.replaceAll(OpenNLPExtractNamedEntitiesUpdateProcessorFactory.this.dest);
                        } else {
                            log.debug("srcSelector.shouldMutate(\"{}\") returned true, but replacement pattern did not match, field skipped.", (Object)string);
                            continue;
                        }
                    }
                    for (Object val : srcFieldValues) {
                        for (Pair<String, String> entity : this.extractTypedNamedEntities(val)) {
                            SolrInputField targetField;
                            SolrInputField destField = null;
                            String entityName = (String)entity.first();
                            String entityType = (String)entity.second();
                            destField = doc.containsKey((Object)(resolvedDest = resolvedDest.replace(OpenNLPExtractNamedEntitiesUpdateProcessorFactory.ENTITY_TYPE, entityType))) ? doc.getField(resolvedDest) : ((targetField = (SolrInputField)destMap.get(resolvedDest)) == null ? new SolrInputField(resolvedDest) : targetField);
                            destField.addValue((Object)entityName);
                            destMap.put(resolvedDest, destField);
                        }
                    }
                }
                for (Map.Entry entry : destMap.entrySet()) {
                    doc.put((String)entry.getKey(), (SolrInputField)entry.getValue());
                }
                super.processAdd(cmd);
            }

            /*
             * WARNING - Removed try catching itself - possible behaviour change.
             */
            private List<Pair<String, String>> extractTypedNamedEntities(Object srcFieldValue) throws IOException {
                ArrayList<Pair<String, String>> entitiesWithType = new ArrayList<Pair<String, String>>();
                ArrayList<String> terms = new ArrayList<String>();
                ArrayList<Integer> startOffsets = new ArrayList<Integer>();
                ArrayList<Integer> endOffsets = new ArrayList<Integer>();
                String fullText = srcFieldValue.toString();
                TokenStream tokenStream = this.analyzer.tokenStream("", fullText);
                CharTermAttribute termAtt = (CharTermAttribute)tokenStream.addAttribute(CharTermAttribute.class);
                OffsetAttribute offsetAtt = (OffsetAttribute)tokenStream.addAttribute(OffsetAttribute.class);
                FlagsAttribute flagsAtt = (FlagsAttribute)tokenStream.addAttribute(FlagsAttribute.class);
                tokenStream.reset();
                NLPNERTaggerOp nLPNERTaggerOp = this.nerTaggerOp;
                synchronized (nLPNERTaggerOp) {
                    while (tokenStream.incrementToken()) {
                        terms.add(termAtt.toString());
                        startOffsets.add(offsetAtt.startOffset());
                        endOffsets.add(offsetAtt.endOffset());
                        boolean endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
                        if (!endOfSentence) continue;
                        this.extractEntitiesFromSentence(fullText, terms, startOffsets, endOffsets, entitiesWithType);
                    }
                    tokenStream.end();
                    tokenStream.close();
                    if (!terms.isEmpty()) {
                        this.extractEntitiesFromSentence(fullText, terms, startOffsets, endOffsets, entitiesWithType);
                    }
                    this.nerTaggerOp.reset();
                }
                return entitiesWithType;
            }

            private void extractEntitiesFromSentence(String fullText, List<String> terms, List<Integer> startOffsets, List<Integer> endOffsets, List<Pair<String, String>> entitiesWithType) {
                for (Span span : this.nerTaggerOp.getNames(terms.toArray(new String[terms.size()]))) {
                    String text = fullText.substring(startOffsets.get(span.getStart()), endOffsets.get(span.getEnd() - 1));
                    entitiesWithType.add((Pair<String, String>)new Pair((Object)text, (Object)span.getType()));
                }
                terms.clear();
                startOffsets.clear();
                endOffsets.clear();
            }
        };
    }

    private static FieldMutatingUpdateProcessorFactory.SelectorParams parseSelectorParams(NamedList args) {
        return FieldMutatingUpdateProcessorFactory.parseSelectorParams((NamedList)args);
    }
}

