/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.webgraph;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.scoring.webgraph.LinkDatum;
import org.apache.nutch.scoring.webgraph.Node;
import org.apache.nutch.util.FSUtils;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LinkRank
extends Configured
implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final String NUM_NODES = "_num_nodes_";

    private int runCounter(FileSystem fs, Path webGraphDb) throws IOException, ClassNotFoundException, InterruptedException {
        FSDataInputStream streamLinks;
        Path numLinksPath = new Path(webGraphDb, NUM_NODES);
        Path nodeDb = new Path(webGraphDb, "nodes");
        Job counter = Job.getInstance((Configuration)this.getConf(), (String)("Nutch LinkRank: counter " + String.valueOf(webGraphDb)));
        Configuration conf = counter.getConfiguration();
        FileInputFormat.addInputPath((Job)counter, (Path)nodeDb);
        FileOutputFormat.setOutputPath((Job)counter, (Path)numLinksPath);
        counter.setInputFormatClass(SequenceFileInputFormat.class);
        counter.setJarByClass(Counter.class);
        counter.setMapperClass(Counter.CountMapper.class);
        counter.setCombinerClass(Counter.CountReducer.class);
        counter.setReducerClass(Counter.CountReducer.class);
        counter.setMapOutputKeyClass(Text.class);
        counter.setMapOutputValueClass(LongWritable.class);
        counter.setOutputKeyClass(Text.class);
        counter.setOutputValueClass(LongWritable.class);
        counter.setNumReduceTasks(1);
        counter.setOutputFormatClass(TextOutputFormat.class);
        conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting link counter job");
        try {
            boolean success = counter.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("Link counter", counter);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error("Link counter job failed:", (Throwable)e);
            throw e;
        }
        LOG.info("Finished link counter job");
        FileStatus[] numLinksFiles = fs.listStatus(numLinksPath);
        if (numLinksFiles.length == 0) {
            throw new IOException("Failed to read numlinks temp file:  no file found in " + String.valueOf(numLinksPath));
        }
        if (numLinksFiles.length > 1) {
            throw new IOException("Failed to read numlinks temp file:  expected only one file but found " + numLinksFiles.length + " files in folder " + String.valueOf(numLinksPath));
        }
        Path numLinksFile = numLinksFiles[0].getPath();
        LOG.info("Reading numlinks temp file {}", (Object)numLinksFile);
        FSDataInputStream readLinks = fs.open(numLinksFile);
        CompressionCodecFactory cf = new CompressionCodecFactory(conf);
        CompressionCodec codec = cf.getCodec(numLinksFiles[0].getPath());
        if (codec == null) {
            LOG.debug("No compression codec found for {}, trying uncompressed", (Object)numLinksFile);
            streamLinks = readLinks;
        } else {
            LOG.info("Compression codec of numlinks temp file: {}", (Object)codec.getDefaultExtension());
            readLinks.seek(0L);
            streamLinks = codec.createInputStream((InputStream)readLinks);
        }
        BufferedReader buffer = new BufferedReader(new InputStreamReader((InputStream)streamLinks));
        String numLinksLine = buffer.readLine();
        readLinks.close();
        if (numLinksLine == null || numLinksLine.length() == 0) {
            LOG.error("Failed to determine number of links because of empty line in input {}", (Object)numLinksFile);
            fs.delete(numLinksPath, true);
            throw new IOException("No links to process, is the webgraph empty?");
        }
        LOG.info("Deleting numlinks temp file");
        fs.delete(numLinksPath, true);
        String numLinks = numLinksLine.split("\\s+")[1];
        return Integer.parseInt(numLinks);
    }

    private void runInitializer(Path nodeDb, Path output) throws IOException, InterruptedException, ClassNotFoundException {
        Job initializer = Job.getInstance((Configuration)this.getConf(), (String)("Nutch LinkRank: initializer " + String.valueOf(nodeDb)));
        Configuration conf = initializer.getConfiguration();
        FileInputFormat.addInputPath((Job)initializer, (Path)nodeDb);
        FileOutputFormat.setOutputPath((Job)initializer, (Path)output);
        initializer.setJarByClass(Initializer.class);
        initializer.setInputFormatClass(SequenceFileInputFormat.class);
        initializer.setMapperClass(Initializer.class);
        initializer.setMapOutputKeyClass(Text.class);
        initializer.setMapOutputValueClass(Node.class);
        initializer.setOutputKeyClass(Text.class);
        initializer.setOutputValueClass(Node.class);
        initializer.setOutputFormatClass(MapFileOutputFormat.class);
        conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting initialization job");
        try {
            boolean success = initializer.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("Initialization", initializer);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error("Initialization job failed:", (Throwable)e);
            throw e;
        }
        LOG.info("Finished initialization job.");
    }

    private void runInverter(Path nodeDb, Path outlinkDb, Path output) throws IOException, InterruptedException, ClassNotFoundException {
        Job inverter = Job.getInstance((Configuration)this.getConf(), (String)("Nutch Linkrank: inverter nodedb: " + String.valueOf(nodeDb) + " outlinkdb: " + String.valueOf(outlinkDb)));
        Configuration conf = inverter.getConfiguration();
        FileInputFormat.addInputPath((Job)inverter, (Path)nodeDb);
        FileInputFormat.addInputPath((Job)inverter, (Path)outlinkDb);
        FileOutputFormat.setOutputPath((Job)inverter, (Path)output);
        inverter.setInputFormatClass(SequenceFileInputFormat.class);
        inverter.setJarByClass(Inverter.class);
        inverter.setMapperClass(Inverter.InvertMapper.class);
        inverter.setReducerClass(Inverter.InvertReducer.class);
        inverter.setMapOutputKeyClass(Text.class);
        inverter.setMapOutputValueClass(ObjectWritable.class);
        inverter.setOutputKeyClass(Text.class);
        inverter.setOutputValueClass(LinkDatum.class);
        inverter.setOutputFormatClass(SequenceFileOutputFormat.class);
        conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting inverter job");
        try {
            boolean success = inverter.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("Inverter", inverter);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error("Inverter job failed:", (Throwable)e);
            throw e;
        }
        LOG.info("Finished inverter job.");
    }

    private void runAnalysis(Path nodeDb, Path inverted, Path output, int iteration, int numIterations, float rankOne) throws IOException, InterruptedException, ClassNotFoundException {
        Job analyzer = Job.getInstance((Configuration)this.getConf(), (String)("Nutch LinkRank: analysis iteration" + (iteration + 1) + " of " + numIterations));
        Configuration conf = analyzer.getConfiguration();
        conf.set("link.analyze.iteration", String.valueOf(iteration + 1));
        FileInputFormat.addInputPath((Job)analyzer, (Path)nodeDb);
        FileInputFormat.addInputPath((Job)analyzer, (Path)inverted);
        FileOutputFormat.setOutputPath((Job)analyzer, (Path)output);
        conf.set("link.analyze.rank.one", String.valueOf(rankOne));
        analyzer.setMapOutputKeyClass(Text.class);
        analyzer.setMapOutputValueClass(ObjectWritable.class);
        analyzer.setInputFormatClass(SequenceFileInputFormat.class);
        analyzer.setJarByClass(Analyzer.class);
        analyzer.setMapperClass(Analyzer.AnalyzerMapper.class);
        analyzer.setReducerClass(Analyzer.AnalyzerReducer.class);
        analyzer.setOutputKeyClass(Text.class);
        analyzer.setOutputValueClass(Node.class);
        analyzer.setOutputFormatClass(MapFileOutputFormat.class);
        conf.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting analysis job");
        try {
            boolean success = analyzer.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("Analysis", analyzer);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error("Analysis job failed:", (Throwable)e);
            throw e;
        }
        LOG.info("Finished analysis job.");
    }

    public LinkRank() {
    }

    public LinkRank(Configuration conf) {
        super(conf);
    }

    public void analyze(Path webGraphDb) throws IOException, ClassNotFoundException, InterruptedException {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("LinkRank Analysis: starting");
        Path linkRank = new Path(webGraphDb, "linkrank");
        Configuration conf = this.getConf();
        FileSystem fs = linkRank.getFileSystem(conf);
        if (!fs.exists(linkRank)) {
            fs.mkdirs(linkRank);
        }
        Path wgOutlinkDb = new Path(webGraphDb, "outlinks/current");
        Path wgNodeDb = new Path(webGraphDb, "nodes");
        Path nodeDb = new Path(linkRank, "nodes");
        int numLinks = this.runCounter(fs, webGraphDb);
        this.runInitializer(wgNodeDb, nodeDb);
        float rankOneScore = 1.0f / (float)numLinks;
        LOG.info("Analysis: Number of links: {}", (Object)numLinks);
        LOG.info("Analysis: Rank One: {}", (Object)Float.valueOf(rankOneScore));
        int numIterations = conf.getInt("link.analyze.num.iterations", 10);
        for (int i = 0; i < numIterations; ++i) {
            LOG.info("Analysis: Starting iteration {} of {}", (Object)(i + 1), (Object)numIterations);
            Path tempRank = new Path(String.valueOf(linkRank) + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
            fs.mkdirs(tempRank);
            Path tempInverted = new Path(tempRank, "inverted");
            Path tempNodeDb = new Path(tempRank, "nodes");
            this.runInverter(nodeDb, wgOutlinkDb, tempInverted);
            this.runAnalysis(nodeDb, tempInverted, tempNodeDb, i, numIterations, rankOneScore);
            LOG.info("Analysis: Installing new link scores");
            FSUtils.replace(fs, linkRank, tempRank, true);
            LOG.info("Analysis: finished iteration {} of {}", (Object)(i + 1), (Object)numIterations);
        }
        LOG.info("Analysis: Installing web graph nodes");
        FSUtils.replace(fs, wgNodeDb, nodeDb, true);
        fs.delete(linkRank, true);
        stopWatch.stop();
        LOG.info("LinkRank Analysis: finished, elapsed: {} ms", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new LinkRank(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName((String)"help");
        OptionBuilder.withDescription((String)"show this help message");
        Option helpOpts = OptionBuilder.create((String)"help");
        options.addOption(helpOpts);
        OptionBuilder.withArgName((String)"webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the web graph db to use");
        Option webgraphOpts = OptionBuilder.create((String)"webgraphdb");
        options.addOption(webgraphOpts);
        GnuParser parser = new GnuParser();
        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("help") || !line.hasOption("webgraphdb")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("LinkRank", options);
                return -1;
            }
            String webGraphDb = line.getOptionValue("webgraphdb");
            this.analyze(new Path(webGraphDb));
            return 0;
        }
        catch (Exception e) {
            LOG.error("LinkAnalysis:", (Throwable)e);
            return -2;
        }
    }

    private static class Analyzer {
        private Analyzer() {
        }

        public static class AnalyzerReducer
        extends Reducer<Text, ObjectWritable, Text, Node> {
            private Configuration conf;
            private float dampingFactor = 0.85f;
            private float rankOne = 0.0f;
            private int itNum = 0;
            private boolean limitPages = true;
            private boolean limitDomains = true;

            public void setup(Reducer.Context context) {
                this.conf = context.getConfiguration();
                this.dampingFactor = this.conf.getFloat("link.analyze.damping.factor", 0.85f);
                this.rankOne = this.conf.getFloat("link.analyze.rank.one", 0.0f);
                this.itNum = this.conf.getInt("link.analyze.iteration", 0);
                this.limitPages = this.conf.getBoolean("link.ignore.limit.page", true);
                this.limitDomains = this.conf.getBoolean("link.ignore.limit.domain", true);
            }

            public void reduce(Text key, Iterable<ObjectWritable> values, Reducer.Context context) throws IOException, InterruptedException {
                String url = key.toString();
                HashSet<String> domains = new HashSet<String>();
                HashSet<String> pages = new HashSet<String>();
                Node node = null;
                int numInlinks = 0;
                float totalInlinkScore = this.rankOne;
                for (ObjectWritable next : values) {
                    Object value = next.get();
                    if (value instanceof Node) {
                        node = (Node)value;
                        continue;
                    }
                    if (!(value instanceof LinkDatum)) continue;
                    LinkDatum linkDatum = (LinkDatum)value;
                    float scoreFromInlink = linkDatum.getScore();
                    String inlinkUrl = linkDatum.getUrl();
                    String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
                    String inLinkPage = URLUtil.getPage(inlinkUrl);
                    if (this.limitPages && pages.contains(inLinkPage) || this.limitDomains && domains.contains(inLinkDomain)) {
                        LOG.debug("{}: ignoring {} from {}, duplicate page or domain", new Object[]{url, Float.valueOf(scoreFromInlink), inlinkUrl});
                        continue;
                    }
                    ++numInlinks;
                    domains.add(inLinkDomain);
                    pages.add(inLinkPage);
                    LOG.debug("{}: adding {} from {}, total: {}", new Object[]{url, Float.valueOf(scoreFromInlink), inlinkUrl, Float.valueOf(totalInlinkScore += scoreFromInlink)});
                }
                float linkRankScore = 1.0f - this.dampingFactor + this.dampingFactor * totalInlinkScore;
                LOG.debug("{}: score: {} num inlinks: {} iteration: {}", new Object[]{url, Float.valueOf(linkRankScore), numInlinks, this.itNum});
                Node outNode = (Node)WritableUtils.clone((Writable)node, (Configuration)this.conf);
                outNode.setInlinkScore(linkRankScore);
                context.write((Object)key, (Object)outNode);
            }
        }

        public static class AnalyzerMapper
        extends Mapper<Text, Writable, Text, ObjectWritable> {
            private Configuration conf;

            public void setup(Mapper.Context context) {
                this.conf = context.getConfiguration();
            }

            public void map(Text key, Writable value, Mapper.Context context) throws IOException, InterruptedException {
                ObjectWritable objWrite = new ObjectWritable();
                objWrite.set((Object)WritableUtils.clone((Writable)value, (Configuration)this.conf));
                context.write((Object)key, (Object)objWrite);
            }
        }
    }

    private static class Inverter {
        private Inverter() {
        }

        public static class InvertReducer
        extends Reducer<Text, ObjectWritable, Text, LinkDatum> {
            private Configuration conf;

            public void setup(Reducer.Context context) {
                this.conf = context.getConfiguration();
            }

            public void reduce(Text key, Iterable<ObjectWritable> values, Reducer.Context context) throws IOException, InterruptedException {
                String fromUrl = key.toString();
                ArrayList<LinkDatum> outlinks = new ArrayList<LinkDatum>();
                Node node = null;
                for (ObjectWritable write : values) {
                    Object obj = write.get();
                    if (obj instanceof Node) {
                        node = (Node)obj;
                        continue;
                    }
                    if (!(obj instanceof LinkDatum)) continue;
                    outlinks.add((LinkDatum)WritableUtils.clone((Writable)((LinkDatum)obj), (Configuration)this.conf));
                }
                int numOutlinks = node.getNumOutlinks();
                float inlinkScore = node.getInlinkScore();
                float outlinkScore = node.getOutlinkScore();
                LOG.debug("{}: num outlinks {}", (Object)fromUrl, (Object)numOutlinks);
                if (numOutlinks > 0) {
                    for (int i = 0; i < outlinks.size(); ++i) {
                        LinkDatum outlink = (LinkDatum)outlinks.get(i);
                        String toUrl = outlink.getUrl();
                        outlink.setUrl(fromUrl);
                        outlink.setScore(outlinkScore);
                        context.write((Object)new Text(toUrl), (Object)outlink);
                        LOG.debug("{}: inverting inlink from {} origscore: {} numOutlinks: {} inlinkscore: {}", new Object[]{toUrl, fromUrl, Float.valueOf(inlinkScore), numOutlinks, Float.valueOf(outlinkScore)});
                    }
                }
            }
        }

        public static class InvertMapper
        extends Mapper<Text, Writable, Text, ObjectWritable> {
            public void setup(Mapper.Context context) {
            }

            public void map(Text key, Writable value, Mapper.Context context) throws IOException, InterruptedException {
                ObjectWritable objWrite = new ObjectWritable();
                objWrite.set((Object)value);
                context.write((Object)key, (Object)objWrite);
            }
        }
    }

    private static class Initializer
    extends Mapper<Text, Node, Text, Node> {
        private Configuration conf;
        private float initialScore = 1.0f;

        private Initializer() {
        }

        public void setup(Mapper.Context context) {
            this.conf = context.getConfiguration();
            this.initialScore = this.conf.getFloat("link.analyze.initial.score", 1.0f);
        }

        public void map(Text key, Node node, Mapper.Context context) throws IOException, InterruptedException {
            String url = key.toString();
            Node outNode = (Node)WritableUtils.clone((Writable)node, (Configuration)this.conf);
            outNode.setInlinkScore(this.initialScore);
            context.write((Object)new Text(url), (Object)outNode);
        }
    }

    private static class Counter {
        private static Text numNodes = new Text("_num_nodes_");
        private static LongWritable one = new LongWritable(1L);

        private Counter() {
        }

        public static class CountReducer
        extends Reducer<Text, LongWritable, Text, LongWritable> {
            public void reduce(Text key, Iterable<LongWritable> values, Reducer.Context context) throws IOException, InterruptedException {
                long total = 0L;
                for (LongWritable val : values) {
                    total += val.get();
                }
                context.write((Object)numNodes, (Object)new LongWritable(total));
            }
        }

        public static class CountMapper
        extends Mapper<Text, Node, Text, LongWritable> {
            public void map(Text key, Node value, Mapper.Context context) throws IOException, InterruptedException {
                context.write((Object)numNodes, (Object)one);
            }
        }
    }
}

