package bixo.parser;

import bixo.config.ParserPolicy;
import bixo.datum.FetchedDatum;
import bixo.datum.ParsedDatum;
import bixo.utils.HttpUtils;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import org.apache.tika.utils.CharsetUtils;

/* loaded from: input_file:bixo/parser/BaseParser.class */
public abstract class BaseParser implements Serializable {
    private ParserPolicy _policy;

    public BaseParser(ParserPolicy parserPolicy) {
        this._policy = parserPolicy;
    }

    public ParserPolicy getParserPolicy() {
        return this._policy;
    }

    public abstract ParsedDatum parse(FetchedDatum fetchedDatum) throws Exception;

    /* JADX INFO: Access modifiers changed from: protected */
    public String getCharset(FetchedDatum fetchedDatum) {
        return CharsetUtils.clean(HttpUtils.getCharsetFromContentType(fetchedDatum.getContentType()));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getLanguage(FetchedDatum fetchedDatum, String str) {
        return fetchedDatum.getHeaders().getFirst("Content-Language");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public URL getContentLocation(FetchedDatum fetchedDatum) throws MalformedURLException {
        URL url = new URL(fetchedDatum.getFetchedUrl());
        String first = fetchedDatum.getHeaders().getFirst("Content-Location");
        if (first != null) {
            url = new URL(url, first);
        }
        return url;
    }
}
