/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer.filter;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.UnrecognizedOptionException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.PrefixStringMatcher;
import org.apache.nutch.util.TrieStringMatcher;
import org.apache.tika.Tika;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MimeTypeIndexingFilter
implements IndexingFilter {
    public static final String MIMEFILTER_REGEX_FILE = "mimetype.filter.file";
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private MimeUtil MIME;
    private Tika tika = new Tika();
    private TrieStringMatcher trie;
    private Configuration conf;
    private boolean acceptMode = true;

    public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
        Writable tcontentType = datum.getMetaData().get((Object)new Text("Content-Type"));
        String contentType = tcontentType != null ? tcontentType.toString() : parse.getData().getMeta("Content-Type");
        String mimeType = contentType == null ? this.tika.detect(url.toString()) : this.MIME.forName(MimeUtil.cleanMimeType((String)contentType));
        contentType = mimeType;
        LOG.info("[{}] {}", (Object)contentType, (Object)url);
        if (this.trie != null) {
            if (this.trie.shortestMatch(contentType) == null) {
                if (this.acceptMode) {
                    return doc;
                }
                return null;
            }
            if (this.acceptMode) {
                return null;
            }
        }
        return doc;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.MIME = new MimeUtil(conf);
        String file = conf.get(MIMEFILTER_REGEX_FILE, "");
        if (file != null) {
            if (file.isEmpty()) {
                LOG.warn(String.format("Missing %s property, ALL mimetypes will be allowed", MIMEFILTER_REGEX_FILE));
            } else {
                Reader reader = conf.getConfResourceAsReader(file);
                try {
                    this.readConfiguration(reader);
                }
                catch (IOException e) {
                    LOG.error(e.getMessage());
                    throw new RuntimeException(e.getMessage(), e);
                }
            }
        }
    }

    private void readConfiguration(Reader reader) throws IOException {
        String line;
        BufferedReader in = new BufferedReader(reader);
        ArrayList<String> rules = new ArrayList<String>();
        block5: while (null != (line = in.readLine())) {
            if (line.length() == 0) continue;
            char first = line.charAt(0);
            switch (first) {
                case '\n': 
                case ' ': 
                case '#': {
                    continue block5;
                }
                case '+': {
                    this.acceptMode = true;
                    continue block5;
                }
                case '-': {
                    this.acceptMode = false;
                    continue block5;
                }
            }
            rules.add(line);
        }
        this.trie = new PrefixStringMatcher(rules);
    }

    public Configuration getConf() {
        return this.conf;
    }

    public static void main(String[] args) throws IOException, IndexingException {
        String line;
        String rulesFile;
        Option helpOpt = new Option("h", "help", false, "show this help message");
        OptionBuilder.withArgName((String)"file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"Rules file to be used in the tests relative to the conf directory");
        OptionBuilder.isRequired();
        Option rulesOpt = OptionBuilder.create((String)"rules");
        Options options = new Options();
        options.addOption(helpOpt).addOption(rulesOpt);
        GnuParser parser = new GnuParser();
        HelpFormatter formatter = new HelpFormatter();
        try {
            CommandLine line2 = parser.parse(options, args);
            if (line2.hasOption("help") || !line2.hasOption("rules")) {
                formatter.printHelp("org.apache.nutch.indexer.filter.MimeTypeIndexingFilter", options, true);
                return;
            }
            rulesFile = line2.getOptionValue("rules");
        }
        catch (UnrecognizedOptionException e) {
            formatter.printHelp("org.apache.nutch.indexer.filter.MimeTypeIndexingFilter", options, true);
            return;
        }
        catch (Exception e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            e.printStackTrace();
            return;
        }
        MimeTypeIndexingFilter filter = new MimeTypeIndexingFilter();
        Configuration conf = NutchConfiguration.create();
        conf.set(MIMEFILTER_REGEX_FILE, rulesFile);
        filter.setConf(conf);
        BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
        while ((line = in.readLine()) != null && !line.isEmpty()) {
            Metadata metadata = new Metadata();
            metadata.set("Content-Type", line);
            ParseImpl parse = new ParseImpl("text", new ParseData(new ParseStatus(), "title", new Outlink[0], metadata));
            NutchDocument doc = filter.filter(new NutchDocument(), (Parse)parse, new Text("http://www.example.com/"), new CrawlDatum(), new Inlinks());
            if (doc != null) {
                System.out.print("+ ");
                System.out.println(line);
                continue;
            }
            System.out.print("- ");
            System.out.println(line);
        }
    }
}

