package bixo.utils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.align.formatter.TmxFormatter;
import org.apache.hadoop.io.file.tfile.TFile;
import org.mortbay.jetty.HttpHeaderValues;

/* loaded from: input_file:bixo/utils/HtmlUtils.class */
public class HtmlUtils {
    private static final Pattern META_ROBOTS_PATTERN = Pattern.compile("(?is)<meta\\s+name\\s*=\\s*['\\\"]\\s*robots['\\\"]\\s+content\\s*=\\s*['\\\"]([^'\"]+)['\\\"]");
    private static final Pattern META_PRAGMA_PATTERN = Pattern.compile("(?is)<meta\\s+http-equiv\\s*=\\s*['\\\"]\\s*pragma['\\\"]\\s+content\\s*=\\s*['\\\"]no-cache['\\\"]");
    private static final Pattern META_CACHE_CONTROL_PATTERN = Pattern.compile("(?is)<meta\\s+http-equiv\\s*=\\s*['\\\"]\\s*cache-control['\\\"]\\s+content\\s*=\\s*['\\\"]([^'\"]+)['\\\"]");
    private static final Pattern META_CONTENT_TYPE_PATTERN = Pattern.compile("(?is)<meta\\s+http-equiv\\s*=\\s*['\\\"]\\s*content-type['\\\"]\\s+content\\s*=\\s*['\\\"]([^'\"]+)['\\\"]");
    private static final Pattern META_CONTENT_LANGUAGE_PATTERN = Pattern.compile("(?is)<meta\\s+http-equiv\\s*=\\s*['\\\"]\\s*content-language['\\\"]\\s+content\\s*=\\s*['\\\"]([^'\"]+)['\\\"]");
    private static final Pattern META_DC_LANGUAGE_PATTERN = Pattern.compile("(?is)<meta\\s+name\\s*=\\s*['\\\"]\\s*dc.language['\\\"]\\s+content\\s*=\\s*['\\\"]([^'\"]+)['\\\"]");

    public static boolean hasNoArchiveMetaTags(String str) {
        Matcher matcher = META_ROBOTS_PATTERN.matcher(str);
        if (matcher.find()) {
            for (String str2 : matcher.group(1).toLowerCase().split(org.apache.hadoop.util.StringUtils.COMMA_STR)) {
                if (str2.equals(TFile.COMPRESSION_NONE) || str2.equals("noarchive")) {
                    return true;
                }
            }
        }
        if (META_PRAGMA_PATTERN.matcher(str).find()) {
            return true;
        }
        Matcher matcher2 = META_CACHE_CONTROL_PATTERN.matcher(str);
        if (!matcher2.find()) {
            return false;
        }
        for (String str3 : matcher2.group(1).toLowerCase().split(org.apache.hadoop.util.StringUtils.COMMA_STR)) {
            if (str3.equals(HttpHeaderValues.NO_CACHE) || str3.equals("no-store") || str3.equals("private")) {
                return true;
            }
        }
        return false;
    }

    public static boolean hasNoFollowMetaTags(String str) {
        Matcher matcher = META_ROBOTS_PATTERN.matcher(str);
        if (!matcher.find()) {
            return false;
        }
        for (String str2 : matcher.group(1).toLowerCase().split(org.apache.hadoop.util.StringUtils.COMMA_STR)) {
            if (str2.equals(TFile.COMPRESSION_NONE) || str2.equals("nofollow")) {
                return true;
            }
        }
        return false;
    }

    public static boolean hasOnlyNonEnglishMetaTags(String str) {
        Matcher matcher = META_CONTENT_TYPE_PATTERN.matcher(str);
        if (matcher.find()) {
            for (String str2 : matcher.group(1).toLowerCase().split(";")) {
                String trim = str2.trim();
                if (trim.equals("charset=gb2312") || trim.equals("charset=gbk") || trim.equals("charset=gb18030") || trim.equals("charset=windows-1251") || trim.equals("charset=iso-2022-jp") || trim.equals("charset=euc-jp") || trim.equals("charset=euc-kr")) {
                    return true;
                }
            }
        }
        Matcher matcher2 = META_CONTENT_LANGUAGE_PATTERN.matcher(str);
        if (matcher2.find()) {
            for (String str3 : matcher2.group(1).toLowerCase().split(org.apache.hadoop.util.StringUtils.COMMA_STR)) {
                if (str3.trim().startsWith(TmxFormatter.TMX_ADMINLANG)) {
                    return false;
                }
            }
            return true;
        }
        Matcher matcher3 = META_DC_LANGUAGE_PATTERN.matcher(str);
        if (!matcher3.find()) {
            return false;
        }
        for (String str4 : matcher3.group(1).toLowerCase().split(";")) {
            if (str4.trim().startsWith(TmxFormatter.TMX_ADMINLANG)) {
                return false;
            }
        }
        return true;
    }
}
