package gr.ilsp.fmc.utils;

import gr.ilsp.fmc.main.SimpleCrawlHFS;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

/* loaded from: input_file:gr/ilsp/fmc/utils/TopicTools.class */
public class TopicTools {
    private static final Logger LOGGER = Logger.getLogger(TopicTools.class);
    private static Analyzer analyzer = null;
    private static AnalyzerFactory analyzerFactory = new AnalyzerFactory();
    protected static Matcher skipLineM = Pattern.compile("^(\\s*)||(#.*)$").matcher("");

    public static ArrayList<String[]> analyzeTopic(String str, String str2, JobConf jobConf) {
        String trim;
        ArrayList<String[]> arrayList = new ArrayList<>();
        Path path = new Path(str);
        jobConf.setJarByClass(SimpleCrawlHFS.class);
        try {
            FileSystem fileSystem = FileSystem.get(jobConf);
            if (fileSystem.exists(path)) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileSystem.open(path), "UTF-8"));
                String[] split = str2.split(";");
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    if (!skipLineM.reset(readLine).matches()) {
                        String trim2 = readLine.subSequence(0, readLine.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR)).toString().trim();
                        String trim3 = readLine.subSequence(readLine.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR) + 1, readLine.indexOf("=")).toString().toLowerCase().trim();
                        String str3 = trim3;
                        int indexOf = readLine.indexOf(">");
                        if (split.length > 1) {
                            if (indexOf < 0) {
                                LOGGER.info("Even though the target languages are more than 1, the language of term '" + readLine + "' is not defined. Modify the topic definition properly.");
                                System.exit(0);
                            }
                            trim = readLine.subSequence(indexOf + 1, readLine.length()).toString().toLowerCase().trim();
                        } else {
                            trim = split[0].toString().trim();
                        }
                        boolean z = false;
                        int length = split.length;
                        int i = 0;
                        while (true) {
                            if (i >= length) {
                                break;
                            }
                            if (split[i].equals(trim)) {
                                z = true;
                                break;
                            }
                            i++;
                        }
                        new ArrayList();
                        if (z) {
                        }
                        ArrayList<String> analyze_vp = analyze_vp(trim3, trim);
                        if (trim.isEmpty()) {
                            analyze_vp = analyze(trim3, str2);
                        }
                        String str4 = "";
                        Iterator<String> it = analyze_vp.iterator();
                        while (it.hasNext()) {
                            str4 = str4.concat(ShingleFilter.TOKEN_SEPARATOR + it.next());
                        }
                        String trim4 = str4.trim();
                        String trim5 = indexOf >= 0 ? readLine.subSequence(readLine.indexOf("=") + 1, readLine.indexOf(">")).toString().trim() : readLine.subSequence(readLine.indexOf("=") + 1, readLine.length()).toString().trim();
                        Boolean bool = true;
                        String[] strArr = new String[1];
                        for (int i2 = 0; i2 < arrayList.size(); i2++) {
                            String[] strArr2 = arrayList.get(i2);
                            if (strArr2[1].equals(trim4) & strArr2[3].equals(trim)) {
                                trim2 = Integer.toString((int) Math.round((Double.parseDouble(trim2) + Double.parseDouble(strArr2[0])) / 2.0d));
                                str3 = strArr2[4].trim();
                                bool = false;
                                arrayList.remove(i2);
                                arrayList.add(new String[]{trim2, trim4, trim5, trim, str3});
                            }
                        }
                        if (bool.booleanValue()) {
                            arrayList.add(new String[]{trim2, trim4, trim5, trim, str3});
                        }
                    }
                }
                bufferedReader.close();
            } else {
                LOGGER.info("The file for topic definition does not exist.");
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return arrayList;
    }

    public static ArrayList<String> analyzeTopicALL(String str) {
        File file = new File(str);
        ArrayList<String> arrayList = new ArrayList<>();
        if (file.exists()) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    if (!skipLineM.reset(readLine).matches()) {
                        arrayList.add(readLine.subSequence(readLine.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR) + 1, readLine.indexOf("=")).toString().trim());
                    }
                }
                bufferedReader.close();
            } catch (IOException e) {
            }
        } else {
            LOGGER.info("The file for topic definition does not exist.");
        }
        return arrayList;
    }

    public static ArrayList<String> analyzeTopicALL(ArrayList<String[]> arrayList) {
        ArrayList<String> arrayList2 = new ArrayList<>();
        String[] strArr = new String[1];
        for (int i = 0; i < arrayList.size(); i++) {
            arrayList2.add(arrayList.get(i)[4].trim());
        }
        return arrayList2;
    }

    public static ArrayList<String> analyze(String str, String str2) throws IOException {
        ArrayList<String> arrayList = new ArrayList<>();
        if (str2.equals("lt")) {
            arrayList = LithuanianAnalyzer.analyze(str);
        } else {
            try {
                analyzer = analyzerFactory.getAnalyzer(str2);
                TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(str));
                CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.addAttribute(CharTermAttribute.class);
                while (tokenStream.incrementToken()) {
                    arrayList.add(charTermAttribute.toString());
                }
                tokenStream.close();
                analyzer.close();
            } catch (Exception e) {
                e.printStackTrace();
                return null;
            }
        }
        return arrayList;
    }

    public static ArrayList<String> analyze_vp(String str, String str2) throws IOException {
        ArrayList<String> arrayList = new ArrayList<>();
        if (str2.equals("lt")) {
            arrayList = LithuanianAnalyzer.analyze(str);
        } else {
            try {
                analyzer = analyzerFactory.getAnalyzer(str2);
                TokenStream tokenStream = analyzer.tokenStream("contents", new StringReader(str));
                CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.addAttribute(CharTermAttribute.class);
                while (tokenStream.incrementToken()) {
                    arrayList.add(charTermAttribute.toString());
                }
                tokenStream.close();
                analyzer.close();
            } catch (Exception e) {
                e.printStackTrace();
                return null;
            }
        }
        return arrayList;
    }

    public static String[] findSubclasses(ArrayList<String[]> arrayList) {
        ArrayList arrayList2 = new ArrayList();
        String[] strArr = new String[1];
        for (int i = 0; i < arrayList.size(); i++) {
            for (String str : arrayList.get(i)[2].split(";")) {
                arrayList2.add(str.trim());
            }
        }
        Object[] objArr = new String[arrayList2.size()];
        for (int i2 = 0; i2 < objArr.length; i2++) {
            objArr[i2] = (String) arrayList2.get(i2);
        }
        Arrays.sort(objArr);
        int i3 = 0;
        for (int i4 = 0; i4 < objArr.length; i4++) {
            if (i4 <= 0 || !objArr[i4].equals(objArr[i4 - 1])) {
                int i5 = i3;
                i3++;
                objArr[i5] = objArr[i4];
            }
        }
        String[] strArr2 = new String[i3];
        System.arraycopy(objArr, 0, strArr2, 0, i3);
        return strArr2;
    }

    public static double calculateThreshold(ArrayList<String[]> arrayList, int i) {
        ArrayList arrayList2 = new ArrayList();
        int i2 = 0;
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            double parseDouble = Double.parseDouble(arrayList.get(i3)[0]);
            if (parseDouble > 0.0d) {
                arrayList2.add(Double.valueOf(parseDouble));
                i2++;
            }
        }
        if (arrayList2.size() == 0) {
            return 0.0d;
        }
        Double[] dArr = new Double[i2];
        System.arraycopy(arrayList2.toArray(), 0, dArr, 0, i2);
        Arrays.sort(dArr);
        return i * (dArr.length % 2 == 1 ? dArr[((dArr.length + 1) / 2) - 1].doubleValue() : (dArr[(dArr.length / 2) - 1].doubleValue() + dArr[dArr.length / 2].doubleValue()) / 2.0d);
    }

    public static String convertStreamToString(InputStream inputStream) throws IOException {
        if (inputStream == null) {
            return "";
        }
        StringWriter stringWriter = new StringWriter();
        char[] cArr = new char[1024];
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
            while (true) {
                int read = bufferedReader.read(cArr);
                if (read == -1) {
                    inputStream.close();
                    return stringWriter.toString();
                }
                stringWriter.write(cArr, 0, read);
            }
        } catch (Throwable th) {
            inputStream.close();
            throw th;
        }
    }
}
