/*
 * Decompiled with CFR 0.152.
 */
package org.apache.lucene.benchmark.utils;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExtractWikipedia {
    private File wikipedia;
    private File outputDir;
    public static int count = 0;
    static String[] months = new String[]{"JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"};

    public ExtractWikipedia(File wikipedia, File outputDir) {
        this.wikipedia = wikipedia;
        this.outputDir = outputDir;
        System.out.println("Deleting all files in " + outputDir);
        File[] files = outputDir.listFiles();
        for (int i = 0; i < files.length; ++i) {
            files[i].delete();
        }
    }

    public void extract() {
        try {
            Parser parser = new Parser();
            XMLReader reader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
            reader.setContentHandler(parser);
            reader.setErrorHandler(parser);
            reader.parse(new InputSource(new FileInputStream(this.wikipedia)));
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static void main(String[] args) {
        File wikipedia;
        if (args.length != 2) {
            ExtractWikipedia.printUsage();
        }
        if ((wikipedia = new File(args[0])).exists()) {
            File outputDir = new File(args[1]);
            outputDir.mkdirs();
            ExtractWikipedia extractor = new ExtractWikipedia(wikipedia, outputDir);
            extractor.extract();
        } else {
            ExtractWikipedia.printUsage();
        }
    }

    private static void printUsage() {
        System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractWikipedia <Path to Wikipedia XML file> <Output Path>");
    }

    public class Parser
    extends DefaultHandler {
        StringBuffer contents = new StringBuffer();
        String title;
        String id;
        String body;
        String time;
        static final int BASE = 10;

        public void characters(char[] ch, int start, int length) {
            this.contents.append(ch, start, length);
        }

        public void startElement(String namespace, String simple, String qualified, Attributes attributes) {
            if (qualified.equals("page")) {
                this.title = null;
                this.id = null;
                this.body = null;
                this.time = null;
            } else if (qualified.equals("text")) {
                this.contents.setLength(0);
            } else if (qualified.equals("timestamp")) {
                this.contents.setLength(0);
            } else if (qualified.equals("title")) {
                this.contents.setLength(0);
            } else if (qualified.equals("id")) {
                this.contents.setLength(0);
            }
        }

        public File directory(int count, File directory) {
            int base;
            if (directory == null) {
                directory = ExtractWikipedia.this.outputDir;
            }
            for (base = 10; base <= count; base *= 10) {
            }
            if (count < 10) {
                return directory;
            }
            directory = new File(directory, Integer.toString(base / 10));
            directory = new File(directory, Integer.toString(count / (base / 10)));
            return this.directory(count % (base / 10), directory);
        }

        public void create(String id, String title, String time, String body) {
            File d = this.directory(count++, null);
            d.mkdirs();
            File f = new File(d, id + ".txt");
            StringBuffer contents = new StringBuffer();
            contents.append(time);
            contents.append("\n\n");
            contents.append(title);
            contents.append("\n\n");
            contents.append(body);
            contents.append("\n");
            try {
                FileWriter writer = new FileWriter(f);
                writer.write(contents.toString());
                writer.close();
            }
            catch (IOException ioe) {
                throw new RuntimeException(ioe);
            }
        }

        String time(String original) {
            StringBuffer buffer = new StringBuffer();
            buffer.append(original.substring(8, 10));
            buffer.append('-');
            buffer.append(months[Integer.valueOf(original.substring(5, 7)) - 1]);
            buffer.append('-');
            buffer.append(original.substring(0, 4));
            buffer.append(' ');
            buffer.append(original.substring(11, 19));
            buffer.append(".000");
            return buffer.toString();
        }

        public void endElement(String namespace, String simple, String qualified) {
            if (qualified.equals("title")) {
                this.title = this.contents.toString();
            } else if (qualified.equals("text")) {
                this.body = this.contents.toString();
                if (this.body.startsWith("#REDIRECT") || this.body.startsWith("#redirect")) {
                    this.body = null;
                }
            } else if (qualified.equals("timestamp")) {
                this.time = this.time(this.contents.toString());
            } else if (qualified.equals("id") && this.id == null) {
                this.id = this.contents.toString();
            } else if (qualified.equals("page") && this.body != null) {
                this.create(this.id, this.title, this.time, this.body);
            }
        }
    }
}

