/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.byTask.feeds;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Locale;
import java.util.zip.GZIPInputStream;
import org.apache.lucene.benchmark.byTask.feeds.BasicDocMaker;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;

public class TrecDocMaker
extends BasicDocMaker {
    private static final String newline = System.getProperty("line.separator");
    private ThreadLocal dateFormat = new ThreadLocal();
    private File dataDir = null;
    private ArrayList inputFiles = new ArrayList();
    private int nextFile = 0;
    private int iteration = 0;
    private BufferedReader reader;
    private GZIPInputStream zis;
    private static final String[] DATE_FORMATS = new String[]{"EEE, dd MMM yyyy kk:mm:ss z", "EEE MMM dd kk:mm:ss yyyy z", "EEE, dd-MMM-':'y kk:mm:ss z", "EEE, dd-MMM-yyy kk:mm:ss z"};

    public void setConfig(Config config) {
        super.setConfig(config);
        File workDir = new File(config.get("work.dir", "work"));
        String d = config.get("docs.dir", "trec");
        this.dataDir = new File(d);
        if (!this.dataDir.isAbsolute()) {
            this.dataDir = new File(workDir, d);
        }
        this.collectFiles(this.dataDir, this.inputFiles);
        if (this.inputFiles.size() == 0) {
            throw new RuntimeException("No txt files in dataDir: " + this.dataDir.getAbsolutePath());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void openNextFile() throws NoMoreDataException, Exception {
        this.closeInputs();
        int retries = 0;
        while (true) {
            File f = null;
            TrecDocMaker trecDocMaker = this;
            synchronized (trecDocMaker) {
                if (this.nextFile >= this.inputFiles.size()) {
                    if (!this.forever) {
                        throw new NoMoreDataException();
                    }
                    this.nextFile = 0;
                    ++this.iteration;
                }
                f = (File)this.inputFiles.get(this.nextFile++);
            }
            System.out.println("opening: " + f + " length: " + f.length());
            try {
                this.zis = new GZIPInputStream(new BufferedInputStream(new FileInputStream(f)));
                this.reader = new BufferedReader(new InputStreamReader(this.zis));
                return;
            }
            catch (Exception e) {
                if (++retries < 20) {
                    System.out.println("Skipping 'bad' file " + f.getAbsolutePath() + "  #retries=" + retries);
                    continue;
                }
                throw new NoMoreDataException();
            }
            break;
        }
    }

    private void closeInputs() {
        if (this.zis != null) {
            try {
                this.zis.close();
            }
            catch (IOException e) {
                System.out.println("closeInputs(): Ingnoring error: " + e);
                e.printStackTrace();
            }
            this.zis = null;
        }
        if (this.reader != null) {
            try {
                this.reader.close();
            }
            catch (IOException e) {
                System.out.println("closeInputs(): Ingnoring error: " + e);
                e.printStackTrace();
            }
            this.reader = null;
        }
    }

    private StringBuffer read(String prefix, StringBuffer sb, boolean collectMatchLine, boolean collectAll) throws Exception {
        sb = sb == null ? new StringBuffer() : sb;
        String sep = "";
        while (true) {
            String line;
            if ((line = this.reader.readLine()) == null) {
                this.openNextFile();
                continue;
            }
            if (line.startsWith(prefix)) {
                if (!collectMatchLine) break;
                sb.append(sep + line);
                sep = newline;
                break;
            }
            if (!collectAll) continue;
            sb.append(sep + line);
            sep = newline;
        }
        return sb;
    }

    protected synchronized DocData getNextDocData() throws NoMoreDataException, Exception {
        if (this.reader == null) {
            this.openNextFile();
        }
        this.read("<DOC>", null, false, false);
        StringBuffer sb = this.read("<DOCNO>", null, true, false);
        String name = sb.substring("<DOCNO>".length());
        name = name.substring(0, name.indexOf("</DOCNO>")) + "_" + this.iteration;
        this.read("<DOCHDR>", null, false, false);
        sb = this.read("Date: ", null, true, false);
        String dateStr = sb.substring("Date: ".length());
        this.read("</DOCHDR>", null, false, false);
        sb = this.read("</DOC>", null, false, true);
        Date date = this.parseDate(dateStr);
        HTMLParser p = this.getHtmlParser();
        DocData docData = p.parse(name, date, sb, this.getDateFormat(0));
        this.addBytes(sb.length());
        return docData;
    }

    private DateFormat getDateFormat(int n) {
        DateFormat[] df = (DateFormat[])this.dateFormat.get();
        if (df == null) {
            df = new SimpleDateFormat[DATE_FORMATS.length];
            for (int i = 0; i < df.length; ++i) {
                df[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.US);
                df[i].setLenient(true);
            }
            this.dateFormat.set(df);
        }
        return df[n];
    }

    private Date parseDate(String dateStr) {
        Date date = null;
        for (int i = 0; i < DATE_FORMATS.length; ++i) {
            try {
                date = this.getDateFormat(i).parse(dateStr.trim());
                return date;
            }
            catch (ParseException e) {
                continue;
            }
        }
        System.out.println("ignoring date parse exception (assigning 'now') for: " + dateStr);
        date = new Date();
        return date;
    }

    public synchronized void resetInputs() {
        super.resetInputs();
        this.closeInputs();
        this.nextFile = 0;
        this.iteration = 0;
    }

    public int numUniqueTexts() {
        return this.inputFiles.size();
    }
}

