package bixo.parser;

import bixo.datum.Outlink;
import java.io.Serializable;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.io.MapFile;
import org.apache.pdfbox.pdmodel.common.PDPageLabelRange;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:bixo/parser/BaseLinkExtractor.class */
public abstract class BaseLinkExtractor extends DefaultHandler implements Serializable {
    public static final Set<String> DEFAULT_LINK_TAGS = new HashSet<String>() { // from class: bixo.parser.BaseLinkExtractor.1
        {
            add(PDPageLabelRange.STYLE_LETTERS_LOWER);
        }
    };
    public static final Set<String> ALL_LINK_TAGS = new HashSet<String>() { // from class: bixo.parser.BaseLinkExtractor.2
        {
            add(PDPageLabelRange.STYLE_LETTERS_LOWER);
            add("img");
            add("frame");
            add("iframe");
            add("link");
            add("area");
            add("input");
            add("bgsound");
            add("object");
            add("blockquote");
            add("q");
            add("ins");
            add("del");
            add("embed");
        }
    };
    public static final Set<String> DEFAULT_LINK_ATTRIBUTE_TYPES = new HashSet<String>() { // from class: bixo.parser.BaseLinkExtractor.3
        {
            add("href");
        }
    };
    public static final Set<String> ALL_LINK_ATTRIBUTE_TYPES = new HashSet<String>() { // from class: bixo.parser.BaseLinkExtractor.4
        {
            add("href");
            add("src");
            add(MapFile.DATA_FILE_NAME);
            add("cite");
        }
    };
    protected String _inAnchorTag;
    protected String _curUrl;
    protected String _curRelAttributes;
    protected StringBuilder _curAnchor = new StringBuilder();
    protected Set<String> _linkTags = DEFAULT_LINK_TAGS;
    protected Set<String> _linkAttributeTypes = DEFAULT_LINK_ATTRIBUTE_TYPES;

    public void setLinkTags(Set<String> set) {
        this._linkTags = set;
    }

    public Set<String> getLinkTags() {
        return this._linkTags;
    }

    public void setLinkAttributeTypes(Set<String> set) {
        this._linkAttributeTypes = set;
    }

    public Set<String> getLinkAttributeTypes() {
        return this._linkAttributeTypes;
    }

    public void reset() {
        this._inAnchorTag = null;
    }

    public void addLink(Outlink outlink) {
    }

    public abstract Outlink[] getLinks();

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        super.startElement(str, str2, str3, attributes);
        String lowerCase = str2.toLowerCase();
        if (this._inAnchorTag == null && this._linkTags.contains(lowerCase)) {
            Iterator<String> it = this._linkAttributeTypes.iterator();
            while (it.hasNext()) {
                String value = attributes.getValue(it.next());
                if (value != null) {
                    this._curUrl = value;
                    this._curRelAttributes = attributes.getValue("rel");
                    this._inAnchorTag = lowerCase;
                    this._curAnchor.setLength(0);
                }
            }
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) throws SAXException {
        super.characters(cArr, i, i2);
        if (this._inAnchorTag != null) {
            this._curAnchor.append(cArr, i, i2);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        super.endElement(str, str2, str3);
        if (str2.equalsIgnoreCase(this._inAnchorTag)) {
            addLink(new Outlink(this._curUrl, this._curAnchor.toString(), this._curRelAttributes));
            this._inAnchorTag = null;
        }
    }
}
