package gr.ilsp.fmc.utils;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import org.apache.tika.metadata.CreativeCommons;
import org.apache.tika.metadata.Metadata;
import org.apache.xerces.parsers.DOMParser;
import org.apache.xerces.xni.parser.XMLDocumentFilter;
import org.cyberneko.html.HTMLConfiguration;
import org.cyberneko.html.filters.Purifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:gr/ilsp/fmc/utils/LicenseParser.class */
public class LicenseParser {
    public static final Logger LOGGER = LoggerFactory.getLogger(LicenseParser.class);
    private static final HashMap<String, String> WORK_TYPE_NAMES = new HashMap<>();

    /* loaded from: input_file:gr/ilsp/fmc/utils/LicenseParser$Walker.class */
    public static class Walker {
        private static final String LICENSES_STR = "/licenses/";
        private static final String HTTP_PROTOCOL = "http";
        private static final String CREATIVECOMMONS_ORG_STR = "creativecommons.org";
        private static final String A_LICENSE_LOCATION = "a";
        private static final String REL_LICENSE_LOCATION = "rel";
        private URL base;
        private URL relLicense;
        private URL anchorLicense;
        private String workType;

        public static void walk(Node node, Metadata metadata) throws Exception {
            String str;
            String url;
            Walker walker = new Walker();
            walker.walk(node);
            if (walker.relLicense != null) {
                str = REL_LICENSE_LOCATION;
                url = walker.relLicense.toString();
            } else {
                if (walker.anchorLicense == null) {
                    throw new Exception("No CC license found.");
                }
                str = "a";
                url = walker.anchorLicense.toString();
            }
            if (url != null) {
                LicenseParser.LOGGER.debug("CC: found " + url + " in " + str);
                metadata.add(CreativeCommons.LICENSE_URL, url);
                metadata.add(CreativeCommons.LICENSE_LOCATION, str);
            }
            if (walker.workType != null) {
                LicenseParser.LOGGER.debug("CC: found " + walker.workType);
                metadata.add(CreativeCommons.WORK_TYPE, walker.workType);
            }
        }

        private void walk(Node node) {
            if (node instanceof Element) {
                findLicenseUrl((Element) node);
            }
            NodeList childNodes = node.getChildNodes();
            for (int i = 0; childNodes != null && i < childNodes.getLength(); i++) {
                walk(childNodes.item(i));
            }
        }

        private void findLicenseUrl(Element element) {
            String attribute;
            if ("a".equalsIgnoreCase(element.getTagName()) && (attribute = element.getAttribute("href")) != null) {
                try {
                    URL url = new URL(this.base, attribute);
                    if ("http".equalsIgnoreCase(url.getProtocol()) && CREATIVECOMMONS_ORG_STR.equalsIgnoreCase(url.getHost()) && url.getPath() != null && url.getPath().startsWith(LICENSES_STR) && url.getPath().length() > LICENSES_STR.length()) {
                        String attribute2 = element.getAttribute(REL_LICENSE_LOCATION);
                        if (attribute2 != null && "license".equals(attribute2) && this.relLicense == null) {
                            this.relLicense = url;
                        } else if (this.anchorLicense == null) {
                            this.anchorLicense = url;
                        }
                    }
                } catch (MalformedURLException e) {
                }
            }
        }
    }

    static {
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Sound", "audio");
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Text", "text");
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Interactive", "interactive");
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Software", "software");
        WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
    }

    public Metadata getLicense(Document document, Metadata metadata) {
        try {
            Walker.walk(document.getDocumentElement(), metadata);
        } catch (Exception e) {
            LOGGER.warn("Could not parse document");
        }
        return metadata;
    }

    public static void main(String[] strArr) throws IOException, SAXException {
        for (String str : new String[]{"anchor.html", "rdf.html", "rel.html"}) {
            new Metadata();
            URL url = new URL(String.valueOf("http://code.creativecommons.org/svnroot/ccnutch/trunk/data/") + str);
            LOGGER.info(url.toString());
            HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
            httpURLConnection.setConnectTimeout(5000);
            httpURLConnection.setReadTimeout(5000);
            httpURLConnection.setUseCaches(false);
            httpURLConnection.setRequestProperty("User-Agent", "ISPL");
            httpURLConnection.setRequestProperty("Accept-Charset", "utf-8");
            httpURLConnection.setRequestProperty("Keep-Alive", "300");
            InputStream inputStream = httpURLConnection.getInputStream();
            HTMLConfiguration hTMLConfiguration = new HTMLConfiguration();
            hTMLConfiguration.setProperty("http://cyberneko.org/html/properties/filters", new XMLDocumentFilter[]{new Purifier()});
            DOMParser dOMParser = new DOMParser(hTMLConfiguration);
            dOMParser.setFeature("http://apache.org/xml/features/dom/include-ignorable-whitespace", false);
            dOMParser.parse(new InputSource(inputStream));
            dOMParser.getDocument();
            new LicenseParser();
        }
    }
}
