package bixo.urls;

import bixo.utils.StringUtils;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.log4j.spi.LocationInfo;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.hsqldb.DatabaseURL;
import org.slf4j.Marker;

/* loaded from: input_file:bixo/urls/SimpleUrlNormalizer.class */
public class SimpleUrlNormalizer extends BaseUrlNormalizer {
    private static final String RESERVED_QUERY_CHARS = "%&;=:?#";
    private static final String RESERVED_PATH_CHARS = "%/?#";
    private static final String HEX_CODES = "0123456789abcdefABCDEF";
    private boolean _treatRefAsQuery;
    private boolean _isAggressive;
    private static final Logger LOGGER = Logger.getLogger(SimpleUrlNormalizer.class);
    private static final Pattern RELATIVE_PATH_PATTERN = Pattern.compile("(/[^/]*[^/.]{1}[^/]*/\\.\\./|^(/\\.\\./)+)");
    private static final Pattern DEFAULT_PAGE_PATTERN = Pattern.compile("/((?i)index|default)\\.((?i)js[pf]{1}?[afx]?|cgi|cfm|asp[x]?|[psx]?htm[l]?|php[3456]?)(\\?|&|#|$)");
    private static final Pattern JSESSION_ID_PATTERN = Pattern.compile("(?:;jsessionid=.*?)(\\?|&|#|$)");
    private static final Pattern SESSION_ID_PATTERN = Pattern.compile("(\\?|&)(?:(?i)sid|phpsessid|sessionid|session_id|bv_sessionid|jsessionid|-session|session|session_key)=.*?(&|#|$)");
    private static final Pattern OTHER_IGNORED_QUERY_PARAMETERS_PATTERN = Pattern.compile("(\\?|&)(?:(?i)width|format|country|height|src|user|username|uname|return_url|returnurl|sort|sort_by|sortby|sort_direction|sort_key|order_by|orderby|sortorder|collate)=.*?(&|#|$)");
    private static final Pattern AGGRESSIVE_IGNORED_QUERY_PARAMETERS_PATTERN = Pattern.compile("(\\?|&)(?:(?i)user|usr|user_id|userid|memberid)=.*?(&|#|$)");

    public SimpleUrlNormalizer() {
        this(false, false);
    }

    public SimpleUrlNormalizer(boolean z) {
        this(z, false);
    }

    public SimpleUrlNormalizer(boolean z, boolean z2) {
        this._treatRefAsQuery = z;
        this._isAggressive = z2;
    }

    private String encodeCodePoint(int i) {
        try {
            byte[] bytes = new String(new int[]{i}, 0, 1).getBytes("UTF-8");
            StringBuilder sb = new StringBuilder();
            for (byte b : bytes) {
                sb.append(String.format("%%%02x", Byte.valueOf(b)));
            }
            return sb.toString();
        } catch (UnsupportedEncodingException e) {
            LOGGER.error("Unexpected exception during URL encoding: " + e);
            return "";
        }
    }

    private String encodeUrlComponent(String str, String str2) {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= str.length()) {
                return sb.toString();
            }
            int codePointAt = str.codePointAt(i2);
            if (codePointAt == 32) {
                sb.append('+');
            } else if (codePointAt >= 127) {
                sb.append(encodeCodePoint(codePointAt));
            } else if (codePointAt < 32 || str2.indexOf((char) codePointAt) != -1) {
                sb.append(String.format("%%%02x", Integer.valueOf(codePointAt)));
            } else {
                sb.append((char) codePointAt);
            }
            i = i2 + Character.charCount(codePointAt);
        }
    }

    public String decodeUrl(String str) {
        int i = 0;
        while (true) {
            int indexOf = str.indexOf(37, i);
            if (indexOf == -1) {
                try {
                    return URLDecoder.decode(str, "UTF-8");
                } catch (UnsupportedEncodingException e) {
                    LOGGER.error("Unexpected exception during URL decoding: " + e);
                    return str;
                }
            }
            i = indexOf + 1;
            boolean z = false;
            if (i > str.length() - 2) {
                z = true;
            } else if (HEX_CODES.indexOf(str.charAt(i)) == -1 || HEX_CODES.indexOf(str.charAt(i + 1)) == -1) {
                z = true;
            }
            if (z) {
                str = str.substring(0, i) + "25" + str.substring(i);
                i++;
            }
        }
    }

    public String normalizeHostname(String str) {
        String lowerCase = str.toLowerCase();
        if (lowerCase.endsWith(".")) {
            lowerCase = lowerCase.substring(0, lowerCase.length() - 1);
        }
        return lowerCase;
    }

    public String normalizePath(String str) {
        Matcher matcher = RELATIVE_PATH_PATTERN.matcher(str);
        while (true) {
            Matcher matcher2 = matcher;
            if (!matcher2.find()) {
                break;
            }
            str = str.substring(0, matcher2.start()) + "/" + str.substring(matcher2.end());
            matcher = RELATIVE_PATH_PATTERN.matcher(str);
        }
        Matcher matcher3 = DEFAULT_PAGE_PATTERN.matcher(str);
        if (matcher3.find()) {
            str = str.substring(0, matcher3.start()) + "/" + matcher3.group(3) + str.substring(matcher3.end());
        }
        String[] split = str.split("/");
        StringBuilder sb = new StringBuilder();
        for (String str2 : split) {
            if (str2.length() > 0) {
                sb.append('/');
                sb.append(encodeUrlComponent(decodeUrl(str2), RESERVED_PATH_CHARS));
            }
        }
        if (sb.length() == 0) {
            return "/";
        }
        if (str.endsWith("/") && sb.charAt(sb.length() - 1) != '/') {
            sb.append('/');
        }
        return sb.toString();
    }

    public String normalizeQuery(String str) {
        if (str == null) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (String str2 : str.split("&")) {
            if (str2.length() != 0) {
                String[] splitOnChar = StringUtils.splitOnChar(str2, '=');
                if (splitOnChar.length == 1) {
                    sb.append(encodeUrlComponent(decodeUrl(splitOnChar[0]), RESERVED_QUERY_CHARS));
                    if (str2.endsWith("=")) {
                        sb.append("=");
                    }
                } else {
                    for (String str3 : splitOnChar) {
                        sb.append(encodeUrlComponent(decodeUrl(str3), RESERVED_QUERY_CHARS));
                        sb.append('=');
                    }
                    sb.setLength(sb.length() - 1);
                }
                sb.append('&');
            }
        }
        if (sb.length() > 0 && sb.charAt(sb.length() - 1) == '&') {
            sb.setLength(sb.length() - 1);
        }
        return sb.toString();
    }

    @Override // bixo.urls.BaseUrlNormalizer
    public String normalize(String str) {
        String str2;
        String normalizeQuery;
        String trim = str.trim();
        if (trim.indexOf("://") == -1) {
            trim = DatabaseURL.S_HTTP + trim;
        }
        Matcher matcher = JSESSION_ID_PATTERN.matcher(trim);
        if (matcher.find()) {
            trim = trim.substring(0, matcher.start()) + matcher.group(1) + trim.substring(matcher.end());
        }
        Matcher matcher2 = SESSION_ID_PATTERN.matcher(trim);
        if (matcher2.find()) {
            trim = trim.substring(0, matcher2.start()) + matcher2.group(1) + matcher2.group(2) + trim.substring(matcher2.end());
        }
        Matcher matcher3 = OTHER_IGNORED_QUERY_PARAMETERS_PATTERN.matcher(trim);
        if (matcher3.find()) {
            trim = trim.substring(0, matcher3.start()) + matcher3.group(1) + matcher3.group(2) + trim.substring(matcher3.end());
        }
        if (this._isAggressive) {
            Matcher matcher4 = AGGRESSIVE_IGNORED_QUERY_PARAMETERS_PATTERN.matcher(trim);
            if (matcher4.find()) {
                trim = trim.substring(0, matcher4.start()) + matcher4.group(1) + matcher4.group(2) + trim.substring(matcher4.end());
            }
        }
        try {
            URL url = new URL(trim.replace(Marker.ANY_NON_NULL_MARKER, "%20"));
            String externalForm = url.toExternalForm();
            String lowerCase = url.getProtocol().toLowerCase();
            if (!lowerCase.equals("http") && !lowerCase.equals("https")) {
                return trim;
            }
            String normalizeHostname = normalizeHostname(url.getHost());
            int port = url.getPort();
            if (port == url.getDefaultPort()) {
                port = -1;
            }
            String normalizePath = normalizePath(url.getPath());
            String query = url.getQuery();
            String ref = url.getRef();
            int indexOf = externalForm.indexOf(PersianAnalyzer.STOPWORDS_COMMENT + ref);
            if (this._treatRefAsQuery && ref != null && query == null && indexOf != -1 && externalForm.charAt(indexOf - 1) == '/') {
                str2 = PersianAnalyzer.STOPWORDS_COMMENT + normalizeQuery(ref);
                normalizeQuery = "";
            } else {
                str2 = "";
                normalizeQuery = normalizeQuery(query);
                if (normalizeQuery.length() > 0) {
                    normalizeQuery = LocationInfo.NA + normalizeQuery;
                }
            }
            try {
                return new URL(lowerCase, normalizeHostname, port, normalizePath + normalizeQuery + str2).toExternalForm();
            } catch (MalformedURLException e) {
                LOGGER.error("Unexpected exception during normalization: " + e);
                return trim;
            }
        } catch (MalformedURLException e2) {
            LOGGER.debug("Malformed URL being returned without further processing: " + trim);
            return trim;
        }
    }
}
