package gr.ilsp.fmc.parser;

import bixo.datum.UrlDatum;
import bixo.hadoop.ImportCounters;
import bixo.urls.BaseUrlFilter;
import cascading.flow.FlowProcess;
import cascading.flow.hadoop.HadoopFlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Filter;
import cascading.operation.FilterCall;
import cascading.operation.OperationCall;
import com.bixolabs.cascading.LoggingFlowProcess;
import com.bixolabs.cascading.LoggingFlowReporter;
import com.bixolabs.cascading.NullContext;

/* loaded from: input_file:gr/ilsp/fmc/parser/ExtendedUrlFilter.class */
public class ExtendedUrlFilter extends BaseOperation<NullContext> implements Filter<NullContext> {
    private static final long serialVersionUID = -5138525444745041509L;
    private BaseUrlFilter _filter;
    private String _mainhost;
    private transient LoggingFlowProcess _flowProcess;

    public ExtendedUrlFilter(BaseUrlFilter baseUrlFilter) {
        this._filter = baseUrlFilter;
    }

    public ExtendedUrlFilter(BaseUrlFilter baseUrlFilter, String str) {
        this._filter = baseUrlFilter;
        this._mainhost = str;
    }

    @Override // cascading.operation.BaseOperation, cascading.operation.Operation
    public void prepare(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        super.prepare(flowProcess, operationCall);
        this._flowProcess = new LoggingFlowProcess((HadoopFlowProcess) flowProcess);
        this._flowProcess.addReporter(new LoggingFlowReporter());
    }

    @Override // cascading.operation.Filter
    public boolean isRemove(FlowProcess flowProcess, FilterCall<NullContext> filterCall) {
        UrlDatum urlDatum = new UrlDatum(filterCall.getArguments());
        String url = urlDatum.getUrl();
        if (url.startsWith("ftp") || url.contains("mailto:") || url.equals("http:/") || url.isEmpty() || url.length() < 7) {
            return false;
        }
        String str = this._mainhost == null ? url : this._mainhost;
        if (!this._filter.isRemove(urlDatum) || url.contains(str)) {
            this._flowProcess.increment(ImportCounters.URLS_ACCEPTED, 1);
            return false;
        }
        this._flowProcess.increment(ImportCounters.URLS_FILTERED, 1);
        return true;
    }

    @Override // cascading.operation.BaseOperation, cascading.operation.Operation
    public void cleanup(FlowProcess flowProcess, OperationCall<NullContext> operationCall) {
        this._flowProcess.dumpCounters();
        super.cleanup(flowProcess, operationCall);
    }
}
