package gr.ilsp.fmc.parser;

import cascading.flow.FlowProcess;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import cascading.operation.OperationCall;
import cascading.tuple.TupleEntryCollector;
import com.bixolabs.cascading.LoggingFlowProcess;
import com.bixolabs.cascading.LoggingFlowReporter;
import com.bixolabs.cascading.NullContext;
import gr.ilsp.fmc.classifier.Classifier;
import gr.ilsp.fmc.datums.ClassifierDatum;
import gr.ilsp.fmc.datums.CrawlDbDatum;
import gr.ilsp.fmc.datums.ExtendedParsedDatum;
import gr.ilsp.fmc.datums.ExtendedUrlDatum;
import org.apache.lucene.analysis.shingle.ShingleFilter;

/* loaded from: input_file:gr/ilsp/fmc/parser/ScoreLinks.class */
public class ScoreLinks extends BaseOperation<NullContext> implements Function<NullContext> {
    private static final long serialVersionUID = 864053504476981356L;
    private Classifier _classifier;
    private transient LoggingFlowProcess _flowProcess;

    /* loaded from: input_file:gr/ilsp/fmc/parser/ScoreLinks$ScoreLinksCounters.class */
    private enum ScoreLinksCounters {
        SCORING_LINKS_NUMBER,
        SCORING_LINKS_TIME,
        SCORING_LINKS_TUNNEL_REJECTED;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static ScoreLinksCounters[] valuesCustom() {
            ScoreLinksCounters[] valuesCustom = values();
            int length = valuesCustom.length;
            ScoreLinksCounters[] scoreLinksCountersArr = new ScoreLinksCounters[length];
            System.arraycopy(valuesCustom, 0, scoreLinksCountersArr, 0, length);
            return scoreLinksCountersArr;
        }
    }

    public ScoreLinks(Classifier classifier) {
        super(ExtendedUrlDatum.FIELDS);
        this._classifier = null;
        this._classifier = classifier;
    }

    @Override // cascading.operation.BaseOperation, cascading.operation.Operation
    public void prepare(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        super.prepare(flowProcess, operationCall);
        this._flowProcess = new LoggingFlowProcess((HadoopFlowProcess) flowProcess);
        this._flowProcess.addReporter(new LoggingFlowReporter());
    }

    @Override // cascading.operation.BaseOperation, cascading.operation.Operation
    public void cleanup(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        this._flowProcess.dumpCounters();
        super.cleanup(flowProcess, operationCall);
    }

    @Override // cascading.operation.Function
    public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) {
        long currentTimeMillis = System.currentTimeMillis();
        ExtendedParsedDatum extendedParsedDatum = new ExtendedParsedDatum(functionCall.getArguments());
        ExtendedOutlink[] outlinks = extendedParsedDatum.getOutlinks();
        double d = extendedParsedDatum.getTupleEntry().getDouble(ClassifierDatum.TOTABSCORE) / outlinks.length;
        int intValue = ((Integer) extendedParsedDatum.getPayloadValue(CrawlDbDatum.CRAWL_DEPTH)).intValue();
        double d2 = extendedParsedDatum.getTupleEntry().getDouble(ClassifierDatum.TOTABSCORE);
        if (this._classifier.getTopic() == null) {
            intValue = -1;
        } else if (d > 0.0d) {
            intValue = -1;
        }
        if (intValue < this._classifier.getMaxDepth()) {
            extendedParsedDatum.setPayloadValue(CrawlDbDatum.CRAWL_DEPTH, Integer.valueOf(intValue + 1));
            TupleEntryCollector outputCollector = functionCall.getOutputCollector();
            String checkLang = Classifier.checkLang(Classifier.cleanContent(extendedParsedDatum.getParsedText()).toLowerCase());
            for (ExtendedOutlink extendedOutlink : outlinks) {
                String str = String.valueOf(extendedOutlink.getAnchor()) + ShingleFilter.TOKEN_SEPARATOR + extendedOutlink.getSurroundText();
                String anchor = extendedOutlink.getAnchor();
                String toUrl = extendedOutlink.getToUrl();
                double rankLink = this._classifier.getTopic() != null ? this._classifier.rankLink(str, anchor, checkLang, d2) : this._classifier.rankLinkNotopic(str, anchor, checkLang, d2);
                if (toUrl != null) {
                    ExtendedUrlDatum extendedUrlDatum = new ExtendedUrlDatum(toUrl.replaceAll("[\n\r]", ""));
                    extendedUrlDatum.setPayload(extendedParsedDatum.getPayload());
                    extendedUrlDatum.setScore(Double.valueOf(d + rankLink));
                    outputCollector.add(extendedUrlDatum.getTuple());
                    this._flowProcess.increment(ScoreLinksCounters.SCORING_LINKS_NUMBER, 1);
                }
            }
        } else {
            this._flowProcess.increment(ScoreLinksCounters.SCORING_LINKS_TUNNEL_REJECTED, outlinks.length);
        }
        this._flowProcess.increment(ScoreLinksCounters.SCORING_LINKS_TIME, (int) (System.currentTimeMillis() - currentTimeMillis));
    }
}
