package gr.ilsp.fmc.parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.tika.metadata.Metadata;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

/* loaded from: input_file:gr/ilsp/fmc/parser/ExtendedLinksExtractor.class */
public class ExtendedLinksExtractor {
    private static final int context_thresh = 40;

    public static ExtendedOutlink[] getLinks(InputStream inputStream, Metadata metadata) {
        int min;
        int min2;
        ExtendedOutlink[] extendedOutlinkArr = null;
        try {
            inputStream.reset();
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, metadata.get("Content-Encoding")));
            String str = "";
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                str = str.concat(readLine);
            }
            bufferedReader.close();
            String str2 = metadata.get("Content-Location");
            Elements select = (str2 != null ? Jsoup.parse(str, str2) : Jsoup.parse(str)).select("a[href]");
            extendedOutlinkArr = new ExtendedOutlink[select.size()];
            int i = 0;
            Iterator<Element> it = select.iterator();
            while (it.hasNext()) {
                Element next = it.next();
                String attr = next.attr("abs:href");
                String trim = next.text().trim();
                List<Node> childNodes = next.parent().childNodes();
                int indexOf = childNodes.indexOf(next);
                String str3 = "";
                String str4 = "";
                String str5 = "";
                int i2 = 0;
                int i3 = 0;
                int i4 = 0;
                for (int i5 = 1; i5 < childNodes.size(); i5++) {
                    if (indexOf - i5 > -1) {
                        String cls = childNodes.get(indexOf - i5).getClass().toString();
                        if (cls.equals("class org.jsoup.nodes.TextNode")) {
                            str3 = childNodes.get(indexOf - i5) + ShingleFilter.TOKEN_SEPARATOR + str3;
                        } else if (cls.equals("class org.jsoup.nodes.Element")) {
                            str3 = String.valueOf(((Element) childNodes.get(indexOf - i5)).text()) + ShingleFilter.TOKEN_SEPARATOR + str3;
                        }
                    }
                    if (indexOf + i5 < childNodes.size()) {
                        String cls2 = childNodes.get(indexOf + i5).getClass().toString();
                        if (cls2.equals("class org.jsoup.nodes.TextNode")) {
                            str4 = String.valueOf(str4) + ShingleFilter.TOKEN_SEPARATOR + childNodes.get(indexOf + i5);
                        } else if (cls2.equals("class org.jsoup.nodes.Element")) {
                            str4 = String.valueOf(str4) + ShingleFilter.TOKEN_SEPARATOR + ((Element) childNodes.get(indexOf + i5)).text();
                        }
                    }
                    str3 = str3.trim();
                    i2 = new StringTokenizer(str3).countTokens();
                    str4 = str4.trim();
                    i3 = new StringTokenizer(str4).countTokens();
                    i4 = i2 + i3;
                    if (i4 >= 40) {
                        break;
                    }
                }
                if (i4 == 0) {
                    i4 = 1;
                }
                if ((i2 >= 40) && (i3 >= 40)) {
                    min = Math.min(i2, (int) Math.rint(20.0d));
                    min2 = Math.min(i3, (int) Math.rint(20.0d));
                } else {
                    min = Math.min(i2, (int) Math.rint((i2 * 40) / i4));
                    min2 = Math.min(i3, (int) Math.rint((i3 * 40) / i4));
                }
                String[] split = str3.split(ShingleFilter.TOKEN_SEPARATOR);
                String[] split2 = str4.split(ShingleFilter.TOKEN_SEPARATOR);
                for (int length = (split.length - min) - 1; length < split.length; length++) {
                    if (length >= 0) {
                        str5 = str5.concat(ShingleFilter.TOKEN_SEPARATOR + split[length]);
                    }
                }
                for (int i6 = 0; i6 < min2; i6++) {
                    str5 = String.valueOf(str5) + ShingleFilter.TOKEN_SEPARATOR + split2[i6];
                }
                extendedOutlinkArr[i] = new ExtendedOutlink(attr, trim, str5);
                i++;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return extendedOutlinkArr;
    }
}
