/*===========================================================================
  Copyright (C) 2017-2018 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
===========================================================================*/

package net.sf.okapi.filters.markdown;

import static net.sf.okapi.filters.markdown.parser.MarkdownTokenType.*;

import java.io.InputStream;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import net.sf.okapi.common.EventType;
import net.sf.okapi.common.filterwriter.IFilterWriter;
import net.sf.okapi.common.skeleton.ISkeletonWriter;
import net.sf.okapi.filters.yaml.YamlFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.MimeTypeMapper;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiBadFilterInputException;
import net.sf.okapi.common.filters.AbstractFilter;
import net.sf.okapi.common.filters.FilterConfiguration;
import net.sf.okapi.common.filters.FilterUtil;
import net.sf.okapi.common.filters.SubFilter;
import net.sf.okapi.common.resource.Code;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.common.resource.TextFragment;
import net.sf.okapi.common.resource.TextFragment.TagType;
import net.sf.okapi.common.resource.TextPart;
import net.sf.okapi.filters.html.HtmlFilter;
import net.sf.okapi.filters.markdown.parser.MarkdownParser;
import net.sf.okapi.filters.markdown.parser.MarkdownToken;
import net.sf.okapi.filters.markdown.parser.MarkdownTokenType;

@UsingParameters(Parameters.class)
public class MarkdownFilter extends AbstractFilter {
    private static final String DEFAULT_HTML_SUBFILTER_CONFIG_SPEC = "okf_html@for_markdown.fprm";
    private static final Pattern HTML_CDATA_PAT = Pattern.compile("\\<!\\[CDATA\\[(.*)\\]\\]\\>"); // Pattern matching a CData section.
    private static final Pattern TAIL_SPACES_PAT = Pattern.compile(" *$");
    private static final Code CDATA_START_CODE = new Code(TagType.OPENING, "cdata", "<![CDATA[");
    private static final Code CDATA_END_CODE = new Code(TagType.CLOSING, "cdata", "]]>");
    private static final String LINK_START_MARKER = "[";

    private final Logger LOGGER = LoggerFactory.getLogger(getClass());
    private MarkdownParser parser;
    private Parameters params = new Parameters();
    private RawDocument currentRawDocument;
    private BOMNewlineEncodingDetector detector;
    private HtmlFilter htmlFilter;
    private int htmlSectionIndex;
    private MarkdownEventBuilder eventBuilder;
    private YamlFilter yamlFilter;
    private static final String DEFAULT_YAML_SUBFILTER_CONFIG_SPEC = "okf_yaml@for_markdown.fprm";
    private int yamlSectionIndex;
    private MarkdownLinePrefixAnnotation lpa = new MarkdownLinePrefixAnnotation("");
    private MarkdownLinePrefixAnnotation lpabl = new MarkdownLinePrefixAnnotation(""); // Spcecial case for blank line.
    private Deque<String> openedCodes;

    // Used to return a list of events generated by a subfilter and reference to it.
    private static class EventsAndRefCode {
        List<Event> events;
        Code refCode;
    }

    public MarkdownFilter() {
        super();
        this.parser = new MarkdownParser(params);
        setMimeType(MimeTypeMapper.MARKDOWN_MIME_TYPE);
        setMultilingual(false);
        setName("okf_markdown");
        setDisplayName("Markdown Filter");
        // must be called *after* parameters is initialized
        setFilterWriter(createFilterWriter());
        addConfiguration(new FilterConfiguration(getName(), MimeTypeMapper.MARKDOWN_MIME_TYPE,
                getClass().getName(), "Markdown",
                "Markdown files", null, ".md;.markdown"));

        htmlFilter = new HtmlFilter();
        InputStream configStream = this.getClass().getResourceAsStream(DEFAULT_HTML_SUBFILTER_CONFIG_SPEC);
        htmlFilter.getParameters().load(configStream, false);

        yamlFilter = new YamlFilter();
        InputStream configStream2 = this.getClass().getResourceAsStream(DEFAULT_YAML_SUBFILTER_CONFIG_SPEC);
        yamlFilter.getParameters().load(configStream2, false);
        this.openedCodes = new LinkedList<>();
    }

    @Override
    public void close() {
        if (currentRawDocument != null) {
            currentRawDocument.close();
            detector = null;
            eventBuilder = null;
        }
    }

    @Override
    public ISkeletonWriter createSkeletonWriter() {
        return new MarkdownSkeletonWriter();
    }

    @Override
    public IFilterWriter createFilterWriter() {
        IFilterWriter filterWriter = getFilterWriter();
        if (filterWriter != null) {
            return filterWriter;
        }
        return new MarkdownFilterWriter(createSkeletonWriter(), getEncoderManager());
    }

    @Override
    public Parameters getParameters() {
        return params;
    }

    @Override
    public boolean hasNext() {
        return eventBuilder.hasQueuedEvents();
    }

    @Override
    protected boolean isUtf8Bom() {
        return detector != null && detector.hasUtf8Bom();
    }

    @Override
    protected boolean isUtf8Encoding() {
        return detector != null && detector.hasUtf8Encoding();
    }

    @Override
    public Event next() {
        if (hasNext()) {
            Event e = eventBuilder.next();
            if (LOGGER.isDebugEnabled()) {
                FilterUtil.logDebugEvent(e, "", LOGGER);
            }
            return e;
        }
        throw new IllegalStateException("No events available");
    }

    @Override
    public void open(RawDocument input) {
        open(input, true);
    }

    @Override
    public void open(RawDocument input, boolean generateSkeleton) {
	    super.open(input, generateSkeleton);
	    htmlSectionIndex = 0;
        yamlSectionIndex = 0;

        currentRawDocument = input;
        if (input.getInputURI() != null) {
            setDocumentName(input.getInputURI().getPath());
        }

        detector = new BOMNewlineEncodingDetector(input.getStream(), input.getEncoding());
        detector.detectAndRemoveBom();
        setNewlineType(detector.getNewlineType().toString());

        String detectedEncoding = getDetectedEncoding();
        input.setEncoding(detectedEncoding);
        setEncoding(detectedEncoding);
        setOptions(input.getSourceLocale(), input.getTargetLocale(), detectedEncoding, generateSkeleton);

        // Make sure the parser is using the latest params
        this.parser = new MarkdownParser(params);

        generateTokens();
        
        if (LOGGER.isDebugEnabled()) { // Log Flexmark node tree
            LOGGER.debug(parser.toString());
        }

	    if (!Util.isEmpty(params.getHtmlSubfilter())) {
            htmlFilter = (HtmlFilter) getFilterConfigurationMapper().createFilter(params.getHtmlSubfilter(), htmlFilter);
            if (htmlFilter == null) {
            throw new OkapiBadFilterInputException("Unknown subfilter: "
                                   + params.getHtmlSubfilter());
            }
	    }

        if (!Util.isEmpty(params.getYamlSubfilter())) {
            yamlFilter = (YamlFilter) getFilterConfigurationMapper().createFilter(params.getYamlSubfilter(), yamlFilter);
            if (yamlFilter == null) {
                throw new OkapiBadFilterInputException("Unknown subfilter: "
                        + params.getYamlSubfilter());
            }
        }
        // Pass the FilterConfigurationMapper to the yaml filter so we can use a sub-filter on it.
        yamlFilter.setFilterConfigurationMapper(getFilterConfigurationMapper());
        
        // Create EventBuilder with document name as rootId
        if (eventBuilder == null) {
            eventBuilder = new MarkdownEventBuilder(getParentId(), this);
        } else {
            eventBuilder.reset(getParentId(), this);
        }
        eventBuilder.setPreserveWhitespace(true);

        // Compile code finder rules
        if (params.getUseCodeFinder()) {
            params.getCodeFinder().compile();
            eventBuilder.setCodeFinder(params.getCodeFinder());
        }
        generateEvents();
    }

    @Override
    public void setParameters(IParameters params) {
        this.params = (Parameters) params;
        // may be new parameter options for skeleton writer and encoder
        createSkeletonWriter();
        getEncoderManager();
    }

    private String getDetectedEncoding() {
        String detectedEncoding = getEncoding();
        if (detector.isDefinitive()) {
            detectedEncoding = detector.getEncoding();
            LOGGER.debug("Overridding user set encoding (if any). Setting auto-detected encoding {}.",
                    detectedEncoding);

        } else if (!detector.isDefinitive() && getEncoding().equals(RawDocument.UNKOWN_ENCODING)) {
            detectedEncoding = detector.getEncoding();
            LOGGER.debug("Default encoding and detected encoding not found. Using best guess encoding {}",
                    detectedEncoding);
        }
        return detectedEncoding;
    }

    private void generateTokens() {
        parser.setNewline(getNewlineType());
        try (Scanner scanner = new Scanner(currentRawDocument.getReader())) {
            scanner.useDelimiter("\\A");
            if (scanner.hasNext()) {
                parser.parse(scanner.next());
            }
        }
    }

    private void generateEvents() {
        eventBuilder.addFilterEvent(createStartFilterEvent());
        if (LOGGER.isDebugEnabled()) { // Log MarkdownTokens that MarkdowonParser.parse() generated.
            LOGGER.debug("Generating Events from the following tokens...\n{}", parser.dumpTokens());
        }
        String content = "";
        while (parser.hasNextToken()) {
            MarkdownToken token = parser.getNextToken();

            if (token.getType().equals(END_TEXT_UNIT)) { // Pseuedo token
                endTUIfCurrent();
            } else if (token.getType().equals(LINE_PREFIX)) { // Pseuedo token
                lpa = new MarkdownLinePrefixAnnotation((token.getContent()));
                lpabl = new MarkdownLinePrefixAnnotation(TAIL_SPACES_PAT.matcher(token.getContent()).replaceFirst(""));
                endTUIfCurrent();
            } else if(token.getType().equals(YAML_METADATA_HEADER)) {
                endTUIfCurrent();
                eventBuilder.addFilterEvents(processByYamlFilter(token.getContent()));
            } else if (isMaybeTranslatableHtmlBlock(token)) { // Process by HTML subfilter.
                endTUIfCurrent();
                EventsAndRefCode er = processByHtmlFilter(token.getContent());
                eventBuilder.addFilterEvents(er.events);
                eventBuilder.addToDocumentPart(er.refCode.toString());
            } else if (token.getType().equals(HTML_INLINE)) { // HTML Inline tag, including CData.
                // Note Flexmark's idea of Inline tag is different from the HTML filter's idea, which is configurable.
                // The HTML filter, under the default configuration, doesn't consider "<li>" as inline, and starts
                // a new TextUnit. Flexmark considers any HTML tags that don't form a block, Inline.
                String tag = token.getContent();
                Matcher m = HTML_CDATA_PAT.matcher(tag);
                if (m.matches()) {
                    String cdataText = m.group(1);
                    startTUIfNotCurrent();
                    eventBuilder.addToTextUnit(CDATA_START_CODE.clone());
                    eventBuilder.addToTextUnit(cdataText);
                    eventBuilder.addToTextUnit(CDATA_END_CODE.clone());
                } else { // Need to be processed by HTML subfilter, as there may be extractable content.
                    EventsAndRefCode er = processByHtmlFilter(tag);
                    // Synthesize the subEvents into the main event stream.
                    for (Event e : er.events) {
                        if (e.isTextUnit()) {
                            ITextUnit tu = e.getTextUnit();
                            if (tu.isReferent()) {
                                eventBuilder.addFilterEvent(e);
                            } else {
                                verifyOurTUAssumptions(tu, tag); //TODO: Remove me later, like end of 2018.
                                if (tu.getSkeleton() != null) {
                                    endTUIfCurrent();
                                    eventBuilder.addFilterEvent(e); // Note: Perhaps we should this to DP? Use EventBuilder.convertTempTextUnitToDocumentPart() ?
                                } else {
                                    startTUIfNotCurrent();
                                    for (TextPart tp : tu.getSource().getParts()) {
                                        TextFragment tf = tp.text;
                                        if (!TextFragment.MARKERS_REGEX.matcher(tf.getCodedText()).replaceAll("")
                                                .isEmpty()) {
                                            LOGGER.warn(
                                                    "TextFragment of a TextUnit generated for the HTML Inline tag \"{}\" has non-code text \"{}\". This is unexpected and non-code part will be discarded.",
                                                    tag, tf.getText());
                                        }
                                        for (Code c : tf.getCodes()) {
                                            c = c.clone();
                                            c.setTagType(TagType.PLACEHOLDER); // Make it an isolated place holder so that renumberCodes() done in endTextUnit won't do anything funny.
                                            eventBuilder.addToTextUnit(c);
                                        }
                                    }
                                }
                            }
                        } else if (e.isDocumentPart()) { // Assumption here is HTML filter always generates a TU, not a DP, for an inline tag.
                            endTUIfCurrent();
                            e.getDocumentPart().setAnnotation(lpa);
                            eventBuilder.addFilterEvent(e);
                        } else if (e.isStartSubfilter() || e.isEndSubfilter()) {
//                            eventBuilder.addFilterEvent(e);
                            LOGGER.debug("{} event from HTML subfilter for \"{}\" being ignored.", e.getEventType().name(), tag);
                        } else {
                            LOGGER.warn("Unexpected {} event from HTML subfilter for \"{}\". Ignored.", e.getEventType().name(), tag);
                        }
                    }
                }
            } // HTML_COMMENT, HTML_INLINE_COMMENT, etc. will be handled by the if(isCode(token)) block.
            else if (isDocumentPart(token)) {
                endTUIfCurrent();
                if (token.getType().equals(BLANK_LINE)) { // We don't want extra space on the right.
                    eventBuilder.addDocumentPart(token.getContent()).setAnnotation(lpabl);
                } else {
                    eventBuilder.addDocumentPart(token.getContent()).setAnnotation(lpa);
                }
            } else if (isInlineMarkup(token)) { // Note: We might want to use insertCodeOrDocPart() to reduce unnecessary codes.
                startTUIfNotCurrent();
                addCode(token);
            } else if (isCode(token)) { // Any non-TEXT, non-empty token that hasn't been processed. 
        	insertCodeOrDocPart(token.getType().name(), token.getContent());
            } else if (token.isTranslatable()) {
                startTUIfNotCurrent();
                eventBuilder.addToTextUnit(Util.normalizeNewlines(token.getContent()));
            } else {
                eventBuilder.addDocumentPart(token.getContent()).setAnnotation(lpa);
            }
        }

        endTUIfCurrent();
        eventBuilder.flushRemainingTempEvents();
        eventBuilder.addFilterEvent(createEndFilterEvent());
    }

    private void startTUIfNotCurrent() {
        if (!eventBuilder.isCurrentTextUnit()) {
            eventBuilder.startTextUnit();
            ITextUnit tu = eventBuilder.peekMostRecentTextUnit();
            tu.setAnnotation(lpa);
        }
    }

    private void endTUIfCurrent() {
        if (eventBuilder.isCurrentTextUnit()) {
            eventBuilder.endTextUnit();
        }
    }

    private void addCode(final MarkdownToken token) {
        switch (token.getType()) {
            case LINK:
            case LINK_REF:
                if (LINK_START_MARKER.equals(token.getContent())) {
                    addPairedCode(TagType.OPENING, token);
                } else {
                    addPairedCode(TagType.CLOSING, token);
                }
                break;
            case EMPHASIS:
            case STRONG_EMPHASIS:
            case CODE:
                if (openedCodes.isEmpty() || !openedCodes.peek().equals(token.getContent())) {
                    addPairedCode(TagType.OPENING, token);
                    openedCodes.push(token.getContent());
                } else {
                    addPairedCode(TagType.CLOSING, token);
                    openedCodes.pop();
                }
                break;
            default:
                addIsolatedCode(getCodeTypeForToken(token.getType()), token.getContent());
        }
    }

    private String getCodeTypeForToken(MarkdownTokenType type) {
        switch (type) {
        case LINK:
        case LINK_REF:
            return Code.TYPE_LINK;
        case EMPHASIS:
            return Code.TYPE_ITALIC;
        case STRONG_EMPHASIS:
            return Code.TYPE_BOLD;
        default:
            return type.name();
        }
    }

    private void addPairedCode(final TagType tagType, final MarkdownToken token) {
        eventBuilder.addToTextUnit(
            new Code(
                tagType,
                getCodeTypeForToken(token.getType()),
                Util.normalizeNewlines((token.getContent()))
            )
        );
    }

    private void addIsolatedCode(final String name, final String content) {
        eventBuilder.addToTextUnit(
            new Code(TagType.PLACEHOLDER, name, Util.normalizeNewlines((content)))
        );
    }

    /*
     * Verify that the TextUnit generated from the HTML tag by the HTML filter
     * matches with the original HTML tag.
     * If failed, we log the violated assumption.
     * This method should be removed in the near future if no violation is reported.
     */
    private void verifyOurTUAssumptions(ITextUnit tu, String tag) {
	if (tu.getAnnotations() != null && tu.getAnnotations().iterator().hasNext()) {
	    LOGGER.error("TU has annotation(s): {}", tu.getAnnotations().toString());
	}
	if (tu.getSkeleton()!=null && !tu.getSource().isEmpty()) {
	    LOGGER.error("TU has a skeleton \"{}\" and a non-empty source \"{}\" at the same time.", tu.getSkeleton().toString(), tu.getSource().getFirstContent().toText());    
	}
	if (tu.getSkeleton()!=null && !( tu.getSkeleton().toString().replaceAll(Pattern.quote(TextFragment.REFMARKER_START)
						   +".*"
						   +Pattern.quote(TextFragment.REFMARKER_END), "").equals(tag) )) {
	    LOGGER.error("TU skeleton \"{}\" doesn't match with the original tag \"{}\"", tu.getSkeleton().toString(), tag);
	}
    }

    /*
     * Process the text that is supposed to be an HTML tag, CData or block
     * by the HTML filter, and return the events.
     */
    private EventsAndRefCode processByHtmlFilter(String content) {
	    String parentId = eventBuilder.findMostRecentParentId();
        if (parentId == null) parentId = getDocumentId().getLastId();

        String parentName = eventBuilder.findMostRecentParentName();
        if (parentName == null) parentName = getDocumentId().getLastId();

        try (SubFilter htmlsf = new SubFilter(htmlFilter, getEncoderManager(),
                                              ++htmlSectionIndex, parentId, parentName)) {
            List<Event> subEvents = htmlsf.getEvents(new RawDocument(content, getSrcLoc()));
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("---- Events from HTML subfilter ----\nInput: \"{}\"", content);
                FilterUtil.logDebugEvents(subEvents, LOGGER);
                LOGGER.debug("---- End of HTML subfilter events ----");
            }
            EventsAndRefCode ret = new EventsAndRefCode();
            ret.events = subEvents;
            ret.refCode = htmlsf.createRefCode();
            return ret;
        }
    }


    /*
     * Process the Metadata tag as Yaml, and return the events.
     */
    private List<Event> processByYamlFilter(String content) {
        String parentId = eventBuilder.findMostRecentParentId();
        if (parentId == null) parentId = getDocumentId().getLastId();

        String parentName = eventBuilder.findMostRecentParentName();
        if (parentName == null) parentName = getDocumentId().getLastId();

        try (SubFilter yamlsf = new SubFilter(yamlFilter, getEncoderManager(),
                ++yamlSectionIndex, parentId, parentName)) {
            List<Event> subEvents = yamlsf.getEvents(new RawDocument(content, getSrcLoc()));
            subEvents.stream()
                .filter(event -> event.getEventType() == EventType.TEXT_UNIT)
                .forEach(event -> event.getTextUnit().setPreserveWhitespaces(eventBuilder.isPreserveWhitespace()));
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("---- Events from YAML subfilter for \"{}\". ----", content);
                FilterUtil.logDebugEvents(subEvents, LOGGER);
                LOGGER.debug("---- End of YAML subfilter events ----");
            }
            return subEvents /*.subList(1, subEvents.size()-1)*/; //was: //Return all events but START_ and END_SUBFILTER.
        }
    }

    private void insertCodeOrDocPart(String type, String data) {
        if (eventBuilder.isCurrentTextUnit()) {
            // Add to the already-existing text unit
            addIsolatedCode(type, data);
        } else {
            // No need to create a text unit starting with a code, so create document part instead
            eventBuilder.addDocumentPart(data).setAnnotation(lpa);
        }
    }
    
    private boolean isCode(MarkdownToken token) {
        return token != null && !isNewline(token) && !token.isTranslatable()
        	  && !isMaybeTranslatableHtmlBlock(token) && !token.getType().equals(TEXT);
    }

    private boolean isMaybeTranslatableHtmlBlock(MarkdownToken token) {
        if (token==null) return false;
        MarkdownTokenType ttype = token.getType();
        return ttype.equals(HTML_BLOCK)
                || ttype.equals(HTML_INNER_BLOCK);
    }

    private boolean isNewline(MarkdownToken token) {
        // NOTE: A HARD_LINE_BREAK token is tricky. It only contains the leading spaces before the newline.
        // A SOFT_LINE_BREAK following the HARD_LINE_BREAK represents the actual newline.
        // For that reason, isNewline returns false for HARD_LINE_BREAK.
        return token != null && (token.getType().equals(SOFT_LINE_BREAK)
                || token.getType().equals(BLANK_LINE));
    }

    private boolean isDocumentPart(MarkdownToken token) {
        if (token==null) return false;
        MarkdownTokenType ttype = token.getType();
        return token != null && !token.isTranslatable() 
            && (isNewline(token)
                || ttype.equals(BULLET_LIST_ITEM)
                || ttype.equals(ORDERED_LIST_ITEM)
                || ttype.equals(FENCED_CODE_BLOCK)
                || ttype.equals(FENCED_CODE_BLOCK_INFO)
                || ttype.equals(HEADING_PREFIX)
                || ttype.equals(HEADING_UNDERLINE)
                || ttype.equals(THEMATIC_BREAK)
                || ttype.equals(REFERENCE)
                || ttype.equals(WHITE_SPACE)
                || ttype.equals(TABLE_PIPE)
                || ttype.equals(TABLE_SEPARATOR)
                || ttype.equals(HTML_INNER_BLOCK_COMMENT)
                || ttype.equals(HTML_COMMENT_BLOCK)
                || ttype.equals(YAML_METADATA_HEADER));
    }

    /*
     * Returns true if the token represents "*" of "*emphasized text*", "__" of "__strongly emphasized text__" etc.
     */
    private boolean isInlineMarkup(MarkdownToken token) {
	if (token==null) return false;
        return token.getType().isInline();
    }
}
