package bixo.operations;

import bixo.datum.UrlDatum;
import bixo.hadoop.ImportCounters;
import cascading.flow.FlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import com.bixolabs.cascading.NullContext;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.xmlbeans.impl.jam.xml.JamXmlElements;

/* loaded from: input_file:bixo/operations/LoadUrlsFunction.class */
public class LoadUrlsFunction extends BaseOperation<NullContext> implements Function<NullContext> {
    private static final Logger LOGGER = Logger.getLogger(LoadUrlsFunction.class);
    private int _maxUrls;
    private int _numUrls;

    public LoadUrlsFunction(int i) {
        super(UrlDatum.FIELDS);
        this._maxUrls = i;
        this._numUrls = 0;
    }

    public LoadUrlsFunction() {
        this(Integer.MAX_VALUE);
    }

    @Override // cascading.operation.Function
    public void operate(FlowProcess flowProcess, FunctionCall<NullContext> functionCall) {
        if (this._numUrls >= this._maxUrls) {
            flowProcess.increment(ImportCounters.URLS_FILTERED, 1);
            return;
        }
        String trim = functionCall.getArguments().getString(JamXmlElements.LINE).trim();
        if (trim.length() == 0 || trim.startsWith(PersianAnalyzer.STOPWORDS_COMMENT)) {
            return;
        }
        try {
            new URL(trim);
            functionCall.getOutputCollector().add(new UrlDatum(trim).getTuple());
            this._numUrls++;
            flowProcess.increment(ImportCounters.URLS_ACCEPTED, 1);
        } catch (MalformedURLException e) {
            LOGGER.warn("Invalid URL in input data file: " + trim);
            flowProcess.increment(ImportCounters.URLS_REJECTED, 1);
        }
    }
}
