package cascading.scheme;

import cascading.scheme.TextLine;
import cascading.tap.TapException;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.Tuples;
import cascading.util.Util;
import java.beans.ConstructorProperties;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory;
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory;
import org.apache.xerces.impl.xs.SchemaSymbols;

/* loaded from: input_file:cascading/scheme/TextDelimited.class */
public class TextDelimited extends TextLine {
    private static final Logger LOG = Logger.getLogger(TextDelimited.class);
    private static final String SPECIAL_REGEX_CHARS = "([\\]\\[|.*<>\\\\$^?()=!+])";
    private static final String QUOTED_REGEX_FORMAT = "%2$s(?!(?:[^%1$s%2$s]|[^%1$s%2$s]%2$s[^%1$s])+%1$s)";
    private static final String CLEAN_REGEX_FORMAT = "^(?:%1$s)(.*)(?:%1$s)$";
    private static final String ESCAPE_REGEX_FORMAT = "(%1$s%1$s)";
    protected Pattern splitPattern;
    protected Pattern cleanPattern;
    protected Pattern escapePattern;
    private boolean skipHeader;
    private String delimiter;
    private String quote;
    private boolean strict;
    private int numValues;
    private Class[] types;
    private boolean safe;
    private Object[] buffer;
    private DecoratorTuple decoratorTuple;

    /* loaded from: input_file:cascading/scheme/TextDelimited$DecoratorTuple.class */
    private static class DecoratorTuple extends Tuple {
        String string;

        private DecoratorTuple() {
            super((List<Object>) null);
        }

        public void set(Tuple tuple, String str) {
            this.elements = Tuple.elements(tuple);
            this.string = str;
        }

        @Override // cascading.tuple.Tuple
        public String toString() {
            return this.string;
        }
    }

    @ConstructorProperties({"fields", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR})
    public TextDelimited(Fields fields, String str) {
        this(fields, (TextLine.Compress) null, str, (String) null, (Class[]) null);
    }

    @ConstructorProperties({"fields", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR})
    public TextDelimited(Fields fields, boolean z, String str) {
        this(fields, (TextLine.Compress) null, z, str, (String) null, (Class[]) null);
    }

    @ConstructorProperties({"fields", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, String str, Class[] clsArr) {
        this(fields, (TextLine.Compress) null, str, (String) null, clsArr);
    }

    @ConstructorProperties({"fields", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, boolean z, String str, Class[] clsArr) {
        this(fields, (TextLine.Compress) null, z, str, (String) null, clsArr);
    }

    @ConstructorProperties({"fields", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, String str, String str2, Class[] clsArr) {
        this(fields, (TextLine.Compress) null, str, str2, clsArr);
    }

    @ConstructorProperties({"fields", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, boolean z, String str, String str2, Class[] clsArr) {
        this(fields, (TextLine.Compress) null, z, str, str2, clsArr);
    }

    @ConstructorProperties({"fields", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, String str, String str2, Class[] clsArr, boolean z) {
        this(fields, (TextLine.Compress) null, str, str2, clsArr, z);
    }

    @ConstructorProperties({"fields", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, boolean z, String str, String str2, Class[] clsArr, boolean z2) {
        this(fields, null, z, str, str2, clsArr, z2);
    }

    @ConstructorProperties({"fields", "sinkCompression", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR})
    public TextDelimited(Fields fields, TextLine.Compress compress, String str) {
        this(fields, compress, str, (String) null, (Class[]) null);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR})
    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str) {
        this(fields, compress, z, str, (String) null, (Class[]) null);
    }

    @ConstructorProperties({"fields", "sinkCompression", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, TextLine.Compress compress, String str, Class[] clsArr) {
        this(fields, compress, str, (String) null, clsArr);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str, Class[] clsArr) {
        this(fields, compress, z, str, (String) null, clsArr);
    }

    @ConstructorProperties({"fields", "sinkCompression", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, TextLine.Compress compress, String str, Class[] clsArr, boolean z) {
        this(fields, compress, str, (String) null, clsArr, z);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str, Class[] clsArr, boolean z2) {
        this(fields, compress, z, str, null, clsArr, z2);
    }

    @ConstructorProperties({"fields", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote"})
    public TextDelimited(Fields fields, String str, String str2) {
        this(fields, (TextLine.Compress) null, str, str2);
    }

    @ConstructorProperties({"fields", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote"})
    public TextDelimited(Fields fields, boolean z, String str, String str2) {
        this(fields, (TextLine.Compress) null, z, str, str2);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote"})
    public TextDelimited(Fields fields, TextLine.Compress compress, String str, String str2) {
        this(fields, compress, false, str, true, str2, null, true);
    }

    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str, String str2) {
        this(fields, compress, z, str, true, str2, null, true);
    }

    @ConstructorProperties({"fields", "sinkCompression", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, TextLine.Compress compress, String str, String str2, Class[] clsArr) {
        this(fields, compress, false, str, true, str2, clsArr, true);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES})
    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str, String str2, Class[] clsArr) {
        this(fields, compress, z, str, true, str2, clsArr, true);
    }

    @ConstructorProperties({"fields", "sinkCompression", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, TextLine.Compress compress, String str, String str2, Class[] clsArr, boolean z) {
        this(fields, compress, false, str, true, str2, clsArr, z);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "quote", WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str, String str2, Class[] clsArr, boolean z2) {
        this(fields, compress, z, str, true, str2, clsArr, z2);
    }

    @ConstructorProperties({"fields", "sinkCompression", "skipHeader", DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, SchemaSymbols.ATTVAL_STRICT, "quote", WordDelimiterFilterFactory.TYPES, "safe"})
    public TextDelimited(Fields fields, TextLine.Compress compress, boolean z, String str, boolean z2, String str2, Class[] clsArr, boolean z3) {
        super(compress);
        this.strict = true;
        this.safe = true;
        setSinkFields(fields);
        setSourceFields(fields);
        Fields sinkFields = getSinkFields();
        this.skipHeader = z;
        this.delimiter = str;
        this.strict = z2;
        this.safe = z3;
        this.numValues = sinkFields.size();
        if (sinkFields.isAll()) {
            this.strict = false;
        }
        if (!sinkFields.isAll() && this.numValues == 0) {
            throw new IllegalArgumentException("may not be zero declared fields, found: " + sinkFields.printVerbose());
        }
        if (str2 != null && !str2.isEmpty()) {
            this.quote = str2;
        }
        this.splitPattern = createSplitPatternFor(this.delimiter, this.quote);
        this.cleanPattern = createCleanPatternFor(this.quote);
        this.escapePattern = createEscapePatternFor(this.quote);
        if (clsArr != null && clsArr.length == 0) {
            this.types = null;
        }
        if (clsArr != null) {
            this.types = (Class[]) Arrays.copyOf(clsArr, clsArr.length);
        }
        if (this.types != null && sinkFields.isAll()) {
            throw new IllegalArgumentException("when using Fields.ALL, field types may not be used");
        }
        if (this.types != null && this.types.length != sinkFields.size()) {
            throw new IllegalArgumentException("num of types must equal number of fields: " + sinkFields.printVerbose() + ", found: " + clsArr.length);
        }
    }

    public static Pattern createEscapePatternFor(String str) {
        if (str == null || str.isEmpty()) {
            return null;
        }
        return Pattern.compile(String.format(ESCAPE_REGEX_FORMAT, str));
    }

    public static Pattern createCleanPatternFor(String str) {
        if (str == null || str.isEmpty()) {
            return null;
        }
        return Pattern.compile(String.format(CLEAN_REGEX_FORMAT, str));
    }

    public static Pattern createSplitPatternFor(String str, String str2) {
        String replaceAll = str.replaceAll(SPECIAL_REGEX_CHARS, "\\\\$1");
        return (str2 == null || str2.isEmpty()) ? Pattern.compile(replaceAll) : Pattern.compile(String.format(QUOTED_REGEX_FORMAT, str2, replaceAll));
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // cascading.scheme.TextLine, cascading.scheme.Scheme
    public Tuple source(Object obj, Object obj2) {
        if (this.skipHeader && ((LongWritable) obj).get() == 0) {
            return null;
        }
        Object[] createSplit = createSplit(obj2.toString(), this.splitPattern, this.numValues);
        if (this.numValues != 0 && createSplit.length != this.numValues) {
            String str = "did not parse correct number of values from input data, expected: " + this.numValues + ", got: " + createSplit.length + ValueAggregatorDescriptor.TYPE_SEPARATOR + Util.join(StringUtils.COMMA_STR, (String[]) createSplit);
            if (this.strict) {
                throw new TapException(str);
            }
            LOG.warn(str);
            Object[] objArr = new Object[this.numValues];
            Arrays.fill(objArr, "");
            System.arraycopy(createSplit, 0, objArr, 0, createSplit.length);
            createSplit = objArr;
        }
        cleanSplit(createSplit, this.cleanPattern, this.escapePattern, this.quote);
        if (this.types != null) {
            Object[] objArr2 = new Object[createSplit.length];
            for (int i = 0; i < createSplit.length; i++) {
                try {
                    objArr2[i] = Tuples.coerce(createSplit[i], this.types[i]);
                } catch (Exception e) {
                    String str2 = "field " + getSourceFields().get(i) + " cannot be coerced from : " + objArr2[i] + " to: " + this.types[i].getName();
                    objArr2[i] = null;
                    LOG.warn(str2, e);
                    if (!this.safe) {
                        throw new TapException(str2, e);
                    }
                }
            }
            createSplit = objArr2;
        }
        return new Tuple(createSplit);
    }

    public static String[] createSplit(String str, Pattern pattern, int i) {
        return pattern.split(str, i);
    }

    public static Object[] cleanSplit(Object[] objArr, Pattern pattern, Pattern pattern2, String str) {
        if (pattern != null) {
            for (int i = 0; i < objArr.length; i++) {
                objArr[i] = pattern.matcher((String) objArr[i]).replaceAll("$1");
                objArr[i] = pattern2.matcher((String) objArr[i]).replaceAll(str);
            }
        }
        for (int i2 = 0; i2 < objArr.length; i2++) {
            if (((String) objArr[i2]).isEmpty()) {
                objArr[i2] = null;
            }
        }
        return objArr;
    }

    private Object[] getBuffer(Tuple tuple) {
        if (this.buffer == null) {
            this.buffer = new Object[tuple.size()];
        }
        return this.buffer;
    }

    @Override // cascading.scheme.TextLine, cascading.scheme.Scheme
    public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) throws IOException {
        Tuple selectTuple = tupleEntry.selectTuple(this.sinkFields);
        Object[] asArray = Tuples.asArray(selectTuple, getBuffer(selectTuple));
        if (this.quote != null) {
            for (int i = 0; i < asArray.length; i++) {
                Object obj = asArray[i];
                if (obj != null) {
                    String obj2 = obj.toString();
                    if (obj2.contains(this.quote)) {
                        obj2 = obj2.replaceAll(this.quote, this.quote + this.quote);
                    }
                    if (obj2.contains(this.delimiter)) {
                        obj2 = this.quote + obj2 + this.quote;
                    }
                    asArray[i] = obj2;
                }
            }
        }
        if (this.decoratorTuple == null) {
            this.decoratorTuple = new DecoratorTuple();
        }
        this.decoratorTuple.set(tupleEntry.getTuple(), Util.join(asArray, this.delimiter, false));
        outputCollector.collect(null, this.decoratorTuple);
    }
}
