package gr.ilsp.fmc.operations;

import cascading.flow.FlowProcess;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Buffer;
import cascading.operation.BufferCall;
import cascading.operation.OperationCall;
import cascading.tuple.TupleEntry;
import com.bixolabs.cascading.LoggingFlowProcess;
import com.bixolabs.cascading.LoggingFlowReporter;
import com.bixolabs.cascading.NullContext;
import gr.ilsp.fmc.datums.CrawlDbDatum;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:gr/ilsp/fmc/operations/MakeDistinctCrawlDbFunction.class */
public class MakeDistinctCrawlDbFunction extends BaseOperation<NullContext> implements Buffer<NullContext> {
    private static final long serialVersionUID = -3251085418755783557L;
    private transient LoggingFlowProcess _flowProcess;
    private static final HashSet<String> statusSet = new HashSet<>(Arrays.asList("SKIPPED_DEFERRED", "SKIPPED_BY_SCORER", "SKIPPED_BY_SCORE", "SKIPPED_TIME_LIMIT", "SKIPPED_INTERRUPTED", "SKIPPED_INEFFICIENT", "SKIPPED_PER_SERVER_LIMIT", "UNFETCHED"));

    /* loaded from: input_file:gr/ilsp/fmc/operations/MakeDistinctCrawlDbFunction$DistinctFunctionCounters.class */
    private enum DistinctFunctionCounters {
        MAKE_DISTINCT_TIME,
        MAKE_DISTINCT_URLS_PROCESSED,
        MAKE_DISTINCT_URLS_UNIQUE;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static DistinctFunctionCounters[] valuesCustom() {
            DistinctFunctionCounters[] valuesCustom = values();
            int length = valuesCustom.length;
            DistinctFunctionCounters[] distinctFunctionCountersArr = new DistinctFunctionCounters[length];
            System.arraycopy(valuesCustom, 0, distinctFunctionCountersArr, 0, length);
            return distinctFunctionCountersArr;
        }
    }

    public MakeDistinctCrawlDbFunction() {
        super(CrawlDbDatum.FIELDS);
    }

    @Override // cascading.operation.BaseOperation, cascading.operation.Operation
    public void prepare(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        super.prepare(flowProcess, operationCall);
        this._flowProcess = new LoggingFlowProcess((HadoopFlowProcess) flowProcess);
        this._flowProcess.addReporter(new LoggingFlowReporter());
    }

    @Override // cascading.operation.BaseOperation, cascading.operation.Operation
    public void cleanup(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        this._flowProcess.dumpCounters();
        super.cleanup(flowProcess, operationCall);
    }

    @Override // cascading.operation.Buffer
    public void operate(FlowProcess flowProcess, BufferCall<NullContext> bufferCall) {
        boolean z;
        long currentTimeMillis = System.currentTimeMillis();
        CrawlDbDatum crawlDbDatum = null;
        Iterator<TupleEntry> argumentsIterator = bufferCall.getArgumentsIterator();
        double d = 0.0d;
        boolean z2 = false;
        while (argumentsIterator.hasNext() && !z2) {
            this._flowProcess.increment(DistinctFunctionCounters.MAKE_DISTINCT_URLS_PROCESSED, 1);
            CrawlDbDatum crawlDbDatum2 = new CrawlDbDatum(argumentsIterator.next());
            double score = crawlDbDatum2.getScore();
            if (!statusSet.contains(crawlDbDatum2.getLastStatus().name())) {
                z = true;
                z2 = true;
            } else if (crawlDbDatum != null) {
                z = ((crawlDbDatum.getLastUpdated() > crawlDbDatum2.getLastUpdated() ? 1 : (crawlDbDatum.getLastUpdated() == crawlDbDatum2.getLastUpdated() ? 0 : -1)) <= 0) & ((score > d ? 1 : (score == d ? 0 : -1)) >= 0);
            } else {
                z = true;
            }
            if (z) {
                crawlDbDatum = new CrawlDbDatum();
                crawlDbDatum.setUrl(crawlDbDatum2.getUrl());
                crawlDbDatum.setLastFetched(crawlDbDatum2.getLastFetched());
                crawlDbDatum.setLastStatus(crawlDbDatum2.getLastStatus());
                crawlDbDatum.setLastUpdated(crawlDbDatum2.getLastUpdated());
                crawlDbDatum.setCrawlDepth(crawlDbDatum2.getCrawlDepth());
                d = score > d ? score : d;
            }
        }
        crawlDbDatum.setScore(d);
        if (crawlDbDatum != null) {
            this._flowProcess.increment(DistinctFunctionCounters.MAKE_DISTINCT_URLS_UNIQUE, 1);
            bufferCall.getOutputCollector().add(crawlDbDatum.getTuple());
        }
        this._flowProcess.increment(DistinctFunctionCounters.MAKE_DISTINCT_TIME, (int) (System.currentTimeMillis() - currentTimeMillis));
    }
}
