package bixo.parser;

import bixo.datum.Outlink;
import gr.ilsp.fmc.utils.CrawlConfig;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.file.tfile.TFile;
import org.apache.hadoop.util.StringUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;

/* loaded from: input_file:bixo/parser/SimpleLinkExtractor.class */
public class SimpleLinkExtractor extends BaseLinkExtractor {
    private boolean _inHead;
    private boolean _skipLinks;
    private List<Outlink> _outlinks = new ArrayList();

    @Override // bixo.parser.BaseLinkExtractor, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        super.startElement(str, str2, str3, attributes);
        if (!this._inHead && str2.equalsIgnoreCase("head")) {
            this._inHead = true;
            return;
        }
        if (this._inHead && str2.equalsIgnoreCase("meta")) {
            String value = attributes.getValue("name");
            String value2 = attributes.getValue(CrawlConfig.CONTENT_SUBDIR_NAME);
            if (value == null || !value.equalsIgnoreCase("robots") || value2 == null) {
                return;
            }
            for (String str4 : value2.split(StringUtils.COMMA_STR)) {
                String lowerCase = str4.trim().toLowerCase();
                if (lowerCase.equals(TFile.COMPRESSION_NONE) || lowerCase.equals("nofollow")) {
                    this._skipLinks = true;
                    return;
                }
            }
        }
    }

    @Override // bixo.parser.BaseLinkExtractor, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        super.endElement(str, str2, str3);
        if (this._inHead && str2.equalsIgnoreCase("head")) {
            this._inHead = false;
        }
    }

    @Override // bixo.parser.BaseLinkExtractor
    public void reset() {
        super.reset();
        this._outlinks.clear();
        this._inHead = false;
        this._skipLinks = false;
    }

    @Override // bixo.parser.BaseLinkExtractor
    public void addLink(Outlink outlink) {
        if (this._skipLinks) {
            return;
        }
        this._outlinks.add(outlink);
    }

    @Override // bixo.parser.BaseLinkExtractor
    public Outlink[] getLinks() {
        return (Outlink[]) this._outlinks.toArray(new Outlink[this._outlinks.size()]);
    }
}
