package gr.ilsp.fmc.extractors;

import gr.ilsp.fmc.extractors.PrintTextLocations;
import gr.ilsp.fmc.utils.ContentNormalizer;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.HelpFormatter;
import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDStream;

/* loaded from: input_file:gr/ilsp/fmc/extractors/Pdf2text.class */
public class Pdf2text {
    private static final Logger LOGGER = Logger.getLogger(Pdf2text.class);
    private static ArrayList<PrintTextLocations.CharAttr> chardata = new ArrayList<>();
    private static ArrayList<LineAttr> linedata = new ArrayList<>();
    private static ArrayList<SectionAttr> sectiondata = new ArrayList<>();
    private static HashMap<String, ArrayList<LineAttr>> docprops = new HashMap<>();
    private static double fontsize_thr = 0.6d;
    private static double align_thr_fully = 0.6d;
    private static double caps_thr = 0.75d;

    /* loaded from: input_file:gr/ilsp/fmc/extractors/Pdf2text$LineAttr.class */
    public static class LineAttr {
        public String chars;
        public float x;
        public float y;
        public float fs;
        public float xs;
        public float h;
        public float s;
        public float w;
        public String p;
        public int t;

        public LineAttr(String str, float f, float f2, float f3, float f4, float f5, float f6, float f7, String str2, int i) {
            this.chars = str;
            this.x = f;
            this.y = f2;
            this.fs = f3;
            this.xs = f4;
            this.h = f5;
            this.s = f6;
            this.w = f7;
            this.p = str2;
            this.t = i;
        }
    }

    /* loaded from: input_file:gr/ilsp/fmc/extractors/Pdf2text$ParagraphAttr.class */
    public static class ParagraphAttr {
        public String chars;
        public float x;
        public float y;
        public float fs;
        public float h;
        public float w;
        public String p;
        public int t;
        public int num;

        public ParagraphAttr(String str, float f, float f2, float f3, float f4, float f5, String str2, int i, int i2) {
            this.chars = str;
            this.x = f;
            this.y = f2;
            this.fs = f3;
            this.h = f4;
            this.w = f5;
            this.p = str2;
            this.t = i;
            this.num = i2;
        }
    }

    /* loaded from: input_file:gr/ilsp/fmc/extractors/Pdf2text$SectionAttr.class */
    public static class SectionAttr {
        public String chars;
        public float x;
        public float y;
        public float fs;
        public float h;
        public float w;
        public String p;
        public int t;
        public int num;
        public int sl;
        public int el;

        public SectionAttr(String str, float f, float f2, float f3, float f4, float f5, String str2, int i, int i2, int i3, int i4) {
            this.chars = str;
            this.x = f;
            this.y = f2;
            this.fs = f3;
            this.h = f4;
            this.w = f5;
            this.p = str2;
            this.t = i;
            this.num = i2;
            this.sl = i3;
            this.el = i4;
        }
    }

    public static void main(String[] strArr) throws IOException {
        File[] listFiles = new File(strArr[0]).listFiles();
        for (int i = 0; i < listFiles.length; i++) {
            if (listFiles[i].isFile()) {
                String absolutePath = listFiles[i].getAbsolutePath();
                if (absolutePath.endsWith(".pdf") || absolutePath.endsWith(".PDF")) {
                    LOGGER.info("---------------------------------------FILE:" + new File(absolutePath).getName());
                    docprops.clear();
                }
            }
        }
    }

    public static String run1(File file, boolean z) {
        String str = "";
        PDDocument pDDocument = null;
        docprops.clear();
        try {
            try {
                pDDocument = PDDocument.load(file);
                if (pDDocument.isEncrypted()) {
                    try {
                        pDDocument.decrypt("");
                    } catch (CryptographyException e) {
                        System.err.println("Error: CryptographyException.");
                    } catch (InvalidPasswordException e2) {
                        System.err.println("Error: Document is encrypted with a password.");
                    }
                }
                PrintTextLocations printTextLocations = new PrintTextLocations();
                List allPages = pDDocument.getDocumentCatalog().getAllPages();
                float f = 0.0f;
                for (int i = 0; i < allPages.size(); i++) {
                    PDPage pDPage = (PDPage) allPages.get(i);
                    if (pDPage.getMediaBox() != null) {
                        f = pDPage.getMediaBox().getHeight();
                        LOGGER.debug("Processing page: " + i + " with height " + f + " and width " + pDPage.getMediaBox().getWidth());
                    } else {
                        LOGGER.error("PROBLEM in getMediaBox");
                    }
                    chardata.clear();
                    linedata.clear();
                    sectiondata.clear();
                    PDStream contents = pDPage.getContents();
                    ArrayList<LineAttr> arrayList = new ArrayList<>();
                    if (contents != null) {
                        printTextLocations.processStream(pDPage, pDPage.findResources(), pDPage.getContents().getStream());
                        chardata = printTextLocations.getchardata();
                        if (chardata.size() < 1) {
                            docprops.put("page" + i, arrayList);
                        } else {
                            layout_analysis(f, z);
                        }
                    }
                    for (int i2 = 0; i2 < linedata.size(); i2++) {
                        arrayList.add(linedata.get(i2));
                        arrayList.get(i2).chars = Utils.normalizeContent(arrayList.get(i2).chars);
                        LOGGER.debug(String.valueOf(arrayList.get(i2).p) + "\t" + arrayList.get(i2).t + "\t" + arrayList.get(i2).chars);
                    }
                    docprops.put("page" + i, arrayList);
                    str = String.valueOf(str) + getAllText(docprops);
                }
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e3) {
                        e3.printStackTrace();
                    }
                }
            } catch (IOException e4) {
                e4.printStackTrace();
                if (pDDocument != null) {
                    try {
                        pDDocument.close();
                    } catch (IOException e5) {
                        e5.printStackTrace();
                    }
                }
            }
            return str;
        } catch (Throwable th) {
            if (pDDocument != null) {
                try {
                    pDDocument.close();
                } catch (IOException e6) {
                    e6.printStackTrace();
                }
            }
            throw th;
        }
    }

    /* JADX WARN: Removed duplicated region for block: B:57:0x035d  */
    /* JADX WARN: Removed duplicated region for block: B:60:0x037c  */
    /* JADX WARN: Removed duplicated region for block: B:63:0x039c  */
    /* JADX WARN: Removed duplicated region for block: B:66:0x03a5  */
    /* JADX WARN: Removed duplicated region for block: B:80:0x0426 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:81:0x03a0  */
    /* JADX WARN: Removed duplicated region for block: B:82:0x0380  */
    /* JADX WARN: Removed duplicated region for block: B:83:0x0361  */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static java.lang.String getText(java.util.HashMap<java.lang.String, java.util.ArrayList<gr.ilsp.fmc.extractors.Pdf2text.LineAttr>> r5) {
        /*
            Method dump skipped, instructions count: 1099
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: gr.ilsp.fmc.extractors.Pdf2text.getText(java.util.HashMap):java.lang.String");
    }

    public static String getAllText(HashMap<String, ArrayList<LineAttr>> hashMap) {
        String str = "";
        boolean z = false;
        for (int i = 0; i < docprops.size(); i++) {
            String str2 = "page" + i;
            if (docprops.containsKey(str2)) {
                if (docprops.get(str2).size() == 1) {
                    str = "<text>" + docprops.get(str2).get(0).chars + "</text>";
                } else {
                    String trim = docprops.get(str2).get(0).chars.trim();
                    if (trim.endsWith(HelpFormatter.DEFAULT_OPT_PREFIX)) {
                        trim = trim.substring(0, trim.lastIndexOf(HelpFormatter.DEFAULT_OPT_PREFIX)).trim();
                        z = true;
                    }
                    str = "<text>";
                    for (int i2 = 1; i2 < docprops.get(str2).size(); i2++) {
                        str = docprops.get(str2).get(i2).p.equals(docprops.get(str2).get(i2 - 1).p) ? z ? String.valueOf(str) + trim : String.valueOf(str) + trim + ShingleFilter.TOKEN_SEPARATOR : String.valueOf(str) + trim + "</text>\n<text>";
                        trim = docprops.get(str2).get(i2).chars.trim();
                        if (trim.endsWith(HelpFormatter.DEFAULT_OPT_PREFIX)) {
                            trim = trim.substring(0, trim.lastIndexOf(HelpFormatter.DEFAULT_OPT_PREFIX)).trim();
                            z = true;
                        } else {
                            z = false;
                        }
                        if (i2 == docprops.get(str2).size() - 1) {
                            trim = docprops.get(str2).get(i2).chars.trim();
                            if (trim.endsWith(HelpFormatter.DEFAULT_OPT_PREFIX)) {
                                trim = trim.substring(0, trim.lastIndexOf(HelpFormatter.DEFAULT_OPT_PREFIX));
                            }
                            str = String.valueOf(str) + trim + "</text>\n";
                        }
                    }
                }
            }
        }
        return str;
    }

    private static int layout_analysis(float f, boolean z) {
        LOGGER.debug("PUT CHARACTERS INTO TEXT-LINES. Just checks the y-coordinates of successive characters.");
        int put_chars_in_textlines = put_chars_in_textlines(f);
        if (put_chars_in_textlines == 0) {
            return 0;
        }
        ArrayList<LineAttr> arrayList = new ArrayList<>();
        ArrayList<Double> findFontSizes = findFontSizes();
        boolean z2 = true;
        if (findFontSizes.size() < 2) {
            z2 = false;
            for (int i = 0; i < chardata.size(); i++) {
                chardata.get(i).fs = Math.round(chardata.get(i).h);
                chardata.get(i).h = Math.round(chardata.get(i).h);
            }
            LOGGER.debug("Analysis will be based on estimated font sizes (i.e. categorization based on text-line heights)");
        } else {
            LOGGER.debug("Analysis will be based on provided font sizes");
        }
        estimate_space_thr_per_line(put_chars_in_textlines, z2);
        if (z2) {
            represent_textline_fonts();
        } else {
            represent_textline_heights();
        }
        ArrayList arrayList2 = new ArrayList();
        for (int i2 = 0; i2 < linedata.size() - 1; i2++) {
            float f2 = linedata.get(i2).x;
            float f3 = linedata.get(i2).y;
            float f4 = f2 + linedata.get(i2).w;
            float f5 = f3 + linedata.get(i2).h;
            for (int i3 = i2 + 1; i3 < linedata.size(); i3++) {
                float f6 = linedata.get(i3).x;
                float f7 = linedata.get(i3).y;
                float f8 = f6 + linedata.get(i3).w;
                float f9 = f7 + linedata.get(i3).h;
                if ((!arrayList2.contains(Integer.valueOf(i2))) & (!arrayList2.contains(Integer.valueOf(i3)))) {
                    if (((f6 >= f2) & (f7 >= f3) & (f8 <= f4) & (f9 <= f5)) && linedata.get(i3).chars.length() <= linedata.get(i2).chars.length()) {
                        arrayList2.add(Integer.valueOf(i3));
                    }
                }
                if (((f2 >= f6) & (f3 >= f7) & (f4 <= f8) & (f5 <= f9)) && linedata.get(i2).chars.length() <= linedata.get(i3).chars.length()) {
                    arrayList2.add(Integer.valueOf(i2));
                }
            }
        }
        for (int i4 = 0; i4 < linedata.size(); i4++) {
            linedata.get(i4).chars = ContentNormalizer.normalizeText(linedata.get(i4).chars);
        }
        for (int i5 = 0; i5 < linedata.size(); i5++) {
            if ((((double) linedata.get(i5).fs) > fontsize_thr) & (((double) linedata.get(i5).h) > fontsize_thr) & (!arrayList2.contains(Integer.valueOf(i5))) & (!LineTypeGuesser.isDigitsOnlyLine(linedata.get(i5).chars.replace(ShingleFilter.TOKEN_SEPARATOR, "")))) {
                arrayList.add(linedata.get(i5));
            }
        }
        linedata = arrayList;
        LOGGER.debug("LINES: " + linedata.size());
        ArrayList<Double> findFontHeights = findFontHeights();
        if (findFontHeights.size() == 1) {
            return 0;
        }
        if (z2) {
            categorizePerFontsize(findFontSizes);
        } else {
            categorizePerFontsize(findFontHeights);
        }
        ArrayList<LineAttr> arrayList3 = linedata;
        createSections();
        ArrayList<SectionAttr> arrayList4 = new ArrayList<>();
        for (int i6 = 0; i6 < sectiondata.size(); i6++) {
            if (sectiondata.get(i6).num > 1) {
                segmentSectionsByDistance(sectiondata.get(i6), i6, z2);
                int parseInt = Integer.parseInt(linedata.get(sectiondata.get(i6).el).p.split("_")[1]);
                if (parseInt == 0) {
                    arrayList4.add(sectiondata.get(i6));
                } else {
                    for (int i7 = 0; i7 < parseInt + 1; i7++) {
                        SectionAttr sectionAttr = new SectionAttr(sectiondata.get(i6).chars, sectiondata.get(i6).x, sectiondata.get(i6).y, sectiondata.get(i6).fs, sectiondata.get(i6).h, sectiondata.get(i6).w, sectiondata.get(i6).p, sectiondata.get(i6).t, sectiondata.get(i6).num, sectiondata.get(i6).sl, sectiondata.get(i6).el);
                        String str = "";
                        int i8 = 0;
                        int i9 = -1;
                        int i10 = -1;
                        for (int i11 = sectionAttr.sl; i11 < sectionAttr.el + 1; i11++) {
                            if (Integer.parseInt(linedata.get(i11).p.split("_")[1]) == i7) {
                                str = String.valueOf(str) + "\n" + linedata.get(i11).chars;
                                if (i9 == -1) {
                                    i9 = i11;
                                }
                                i10 = i11;
                                i8++;
                            }
                        }
                        sectionAttr.chars = str;
                        sectionAttr.sl = i9;
                        sectionAttr.el = i10;
                        sectionAttr.num = i8;
                        arrayList4.add(sectionAttr);
                    }
                }
            } else {
                for (int i12 = 0; i12 < sectiondata.get(i6).num; i12++) {
                    linedata.get(sectiondata.get(i6).sl + i12).p = String.valueOf(i6) + "_0";
                }
                arrayList4.add(sectiondata.get(i6));
            }
        }
        sectiondata.clear();
        sectiondata = arrayList4;
        for (int i13 = 0; i13 < sectiondata.size(); i13++) {
            if (sectiondata.get(i13).num > 2) {
                segmentSectionsByAlignement1(sectiondata.get(i13), i13, z2);
            } else {
                for (int i14 = 0; i14 < sectiondata.get(i13).num; i14++) {
                    linedata.get(sectiondata.get(i13).sl + i14).p = String.valueOf(i13) + "_0";
                }
            }
        }
        for (int i15 = 0; i15 < sectiondata.size(); i15++) {
            float[] fArr = new float[sectiondata.get(i15).num];
            float[] fArr2 = new float[sectiondata.get(i15).num];
            float[] fArr3 = new float[sectiondata.get(i15).num];
            float[] fArr4 = new float[sectiondata.get(i15).num];
            for (int i16 = 0; i16 < sectiondata.get(i15).num; i16++) {
                fArr[i16] = linedata.get(sectiondata.get(i15).sl + i16).y;
                fArr2[i16] = linedata.get(sectiondata.get(i15).sl + i16).x;
                fArr3[i16] = fArr[i16] + linedata.get(sectiondata.get(i15).sl + i16).h;
                fArr4[i16] = linedata.get(sectiondata.get(i15).sl + i16).w;
            }
            Arrays.sort(fArr);
            Arrays.sort(fArr2);
            Arrays.sort(fArr3);
            Arrays.sort(fArr4);
            sectiondata.get(i15).y = Math.round(fArr[0] - 1.0f);
            sectiondata.get(i15).x = Math.round(fArr2[0] - 1.0f);
            sectiondata.get(i15).h = Math.round(fArr3[fArr3.length - 1] + 1.0f) - Math.round(fArr[0] - 1.0f);
            sectiondata.get(i15).w = Math.round(fArr4[fArr4.length - 1] + 1.0f);
        }
        for (int i17 = 0; i17 < sectiondata.size(); i17++) {
            LOGGER.debug("TYPE:\t" + sectiondata.get(i17).t);
            LOGGER.debug(String.valueOf(sectiondata.get(i17).chars) + "\n--------");
        }
        if (z) {
            put_sections_in_order(f);
        }
        ArrayList<LineAttr> arrayList5 = linedata;
        return 1;
    }

    private static void put_sections_in_order(float f) {
        new ArrayList();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < sectiondata.size(); i++) {
            if (sectiondata.get(i).y > 0.9d * f) {
                arrayList.add(Integer.valueOf(i));
                sectiondata.get(i).t = -2;
                arrayList2.add(sectiondata.get(i));
                LOGGER.debug("FOOTER: " + sectiondata.get(i).chars);
            } else if (sectiondata.get(i).y + sectiondata.get(i).h < 0.1d * f) {
                arrayList.add(Integer.valueOf(i));
                sectiondata.get(i).t = -1;
                arrayList2.add(sectiondata.get(i));
                LOGGER.debug("HEADER: " + sectiondata.get(i).chars);
            } else {
                LOGGER.debug("TEXT: " + sectiondata.get(i).chars);
            }
        }
        int i2 = 10;
        ArrayList<SectionAttr> analysis = analysis(sectiondata, arrayList, null, 10);
        for (int i3 = 0; i3 < analysis.size(); i3++) {
            arrayList2.add(analysis.get(i3));
        }
        analysis.clear();
        for (int i4 = 0; i4 < sectiondata.size(); i4++) {
            if (!arrayList2.contains(sectiondata.get(i4))) {
                analysis.add(sectiondata.get(i4));
            }
        }
        arrayList.clear();
        int i5 = 1;
        while (true) {
            if (i5 >= 31) {
                break;
            }
            if (arrayList2.size() < 1) {
                LOGGER.debug("No text extracted from this page");
                break;
            }
            if (analysis.size() <= 0) {
                break;
            }
            analysis = analysis(analysis, arrayList, (SectionAttr) arrayList2.get(arrayList2.size() - 1), i2);
            if (analysis.isEmpty()) {
                i2 *= 2;
            }
            for (int i6 = 0; i6 < analysis.size(); i6++) {
                arrayList2.add(analysis.get(i6));
            }
            analysis.clear();
            for (int i7 = 0; i7 < sectiondata.size(); i7++) {
                if (!arrayList2.contains(sectiondata.get(i7))) {
                    analysis.add(sectiondata.get(i7));
                }
            }
            arrayList.clear();
            i5++;
        }
        sectiondata.clear();
        for (int i8 = 0; i8 < arrayList2.size(); i8++) {
            sectiondata.add((SectionAttr) arrayList2.get(i8));
        }
        ArrayList<LineAttr> arrayList3 = new ArrayList<>();
        for (int i9 = 0; i9 < sectiondata.size(); i9++) {
            for (int i10 = sectiondata.get(i9).sl; i10 <= sectiondata.get(i9).el; i10++) {
                if (sectiondata.get(i9).t < 0) {
                    linedata.get(i10).t = sectiondata.get(i9).t;
                }
                arrayList3.add(linedata.get(i10));
            }
        }
        linedata = arrayList3;
    }

    private static ArrayList<SectionAttr> analysis(ArrayList<SectionAttr> arrayList, ArrayList<Integer> arrayList2, SectionAttr sectionAttr, int i) {
        ArrayList<SectionAttr> arrayList3 = new ArrayList<>();
        ArrayList arrayList4 = new ArrayList();
        float[] fArr = new float[arrayList.size()];
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            fArr[i2] = arrayList.get(i2).y;
        }
        Arrays.sort(fArr);
        for (float f : fArr) {
            int i3 = 0;
            while (true) {
                if (i3 < arrayList.size()) {
                    if ((arrayList.get(i3).y == f) & (!arrayList2.contains(Integer.valueOf(i3)))) {
                        int i4 = (int) (arrayList.get(i3).y - (5.0f * arrayList.get(i3).fs));
                        int i5 = (int) (arrayList.get(i3).y + arrayList.get(i3).h + (5.0f * arrayList.get(i3).fs));
                        int i6 = (int) arrayList.get(i3).x;
                        boolean z = false;
                        boolean z2 = false;
                        int i7 = 0;
                        while (i7 < arrayList.size()) {
                            if ((i7 != i3) & (arrayList.get(i7).w - ((float) i6) < 0.0f) & (Math.min(arrayList.get(i7).y + arrayList.get(i7).h, (float) i5) - Math.max(arrayList.get(i7).y, (float) i4) >= 0.0f) & (arrayList.get(i7).x < arrayList.get(i3).x)) {
                                z = true;
                                arrayList4.add(Integer.valueOf(i3));
                            }
                            i7++;
                        }
                        if (!z) {
                            int i8 = (int) arrayList.get(i3).x;
                            int i9 = (int) arrayList.get(i3).w;
                            int i10 = 0;
                            while (true) {
                                if (i10 >= arrayList.size()) {
                                    break;
                                }
                                if (((i10 != i3) & (Math.max((float) i8, arrayList.get(i10).x) - Math.min((float) i9, arrayList.get(i10).w) < 0.0f)) && arrayList4.contains(Integer.valueOf(i10))) {
                                    z2 = true;
                                    arrayList4.add(Integer.valueOf(i3));
                                    break;
                                }
                                i10++;
                            }
                            if (((!z) && (!z2)) && (!arrayList4.contains(Integer.valueOf(i3)))) {
                                if (arrayList.get(i3).x - (arrayList3.size() > 0 ? arrayList3.get(arrayList3.size() - 1).w : sectionAttr != null ? sectionAttr.w : arrayList.get(i3).x) <= i * arrayList.get(i3).fs) {
                                    arrayList3.add(arrayList.get(i3));
                                    arrayList2.add(Integer.valueOf(i3));
                                    break;
                                }
                            } else {
                                continue;
                            }
                        } else {
                            continue;
                        }
                    }
                    i3++;
                }
            }
        }
        return arrayList3;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static void segmentSectionsByAlignement1(SectionAttr sectionAttr, int i, boolean z) {
        ArrayList arrayList = new ArrayList();
        double[] dArr = new double[sectionAttr.num];
        double[] dArr2 = new double[sectionAttr.num];
        int i2 = 0;
        int[] iArr = new int[sectionAttr.num];
        for (int i3 = sectionAttr.sl; i3 < sectionAttr.el + 1; i3++) {
            dArr[i3 - sectionAttr.sl] = Math.floor(linedata.get(i3).x);
            dArr2[i3 - sectionAttr.sl] = Math.floor(linedata.get(i3).w) - dArr[i3 - sectionAttr.sl];
            if (Character.isUpperCase(linedata.get(i3).chars.trim().charAt(0)) & (!linedata.get(i3).chars.trim().toUpperCase().equals(linedata.get(i3).chars.trim()))) {
                i2++;
                iArr[i3 - sectionAttr.sl] = 1;
            }
        }
        double stdDev = Utils.getStdDev(dArr);
        double stdDev2 = Utils.getStdDev(dArr2);
        double max = Utils.getMax(dArr2);
        double[] find_most_commonValue = Utils.find_most_commonValue(dArr);
        double[] find_most_commonValue2 = Utils.find_most_commonValue(dArr2);
        double max2 = Utils.getMax(dArr2);
        if (find_most_commonValue2[1] == -1.0d) {
        }
        find_most_commonValue2[0] = max;
        boolean z2 = (((find_most_commonValue[1] / ((double) dArr.length)) > align_thr_fully ? 1 : ((find_most_commonValue[1] / ((double) dArr.length)) == align_thr_fully ? 0 : -1)) >= 0) & (((find_most_commonValue2[1] / ((double) dArr2.length)) > align_thr_fully ? 1 : ((find_most_commonValue2[1] / ((double) dArr2.length)) == align_thr_fully ? 0 : -1)) >= 0) ? true : find_most_commonValue[1] / ((double) dArr.length) >= align_thr_fully ? 2 : find_most_commonValue2[1] / ((double) dArr2.length) >= align_thr_fully ? 3 : 4;
        if (z2) {
            for (int i4 = sectionAttr.sl; i4 < sectionAttr.el + 1; i4++) {
                if ((dArr[i4 - sectionAttr.sl] > find_most_commonValue[0] + stdDev) & (!arrayList.contains(Integer.valueOf(i4 - sectionAttr.sl)))) {
                    arrayList.add(Integer.valueOf(i4 - sectionAttr.sl));
                }
                if ((dArr2[i4 - sectionAttr.sl] < find_most_commonValue2[0] - ((double) (2.0f * sectionAttr.fs))) & (!arrayList.contains(Integer.valueOf((i4 - sectionAttr.sl) + 1)))) {
                    arrayList.add(Integer.valueOf((i4 - sectionAttr.sl) + 1));
                }
            }
        }
        if (z2 == 2) {
            for (int i5 = sectionAttr.sl; i5 < sectionAttr.el + 1; i5++) {
                if (dArr[i5 - sectionAttr.sl] > find_most_commonValue[0] + stdDev) {
                    arrayList.add(Integer.valueOf(i5 - sectionAttr.sl));
                }
            }
            for (int i6 = 1; i6 < dArr2.length; i6++) {
                String trim = linedata.get(sectionAttr.sl + i6).chars.trim();
                String trim2 = linedata.get((sectionAttr.sl + i6) - 1).chars.trim();
                String substring = trim.substring(0, 1);
                String substring2 = trim2.substring(trim2.length() - 1, trim2.length());
                if ((Character.isUpperCase(trim.charAt(0)) | substring.matches("\\d.*")) & (((substring2.matches("\\d.*") | substring2.matches(ValueAggregatorDescriptor.TYPE_SEPARATOR) | substring2.matches(";") | substring2.matches("\\.")) & (dArr2[i6 - 1] < max2 - stdDev2)) | (dArr2[i6 - 1] < max2 / 2.0d)) & (!arrayList.contains(Integer.valueOf(i6)))) {
                    arrayList.add(Integer.valueOf(i6));
                }
            }
        }
        if (z2 == 3) {
            for (int i7 = sectionAttr.sl; i7 < sectionAttr.el + 1; i7++) {
                if (dArr2[i7 - sectionAttr.sl] < find_most_commonValue2[0] - stdDev2) {
                    arrayList.add(Integer.valueOf((i7 - sectionAttr.sl) + 1));
                }
            }
            for (int i8 = 1; i8 < dArr2.length; i8++) {
                String trim3 = linedata.get((sectionAttr.sl + i8) - 1).chars.trim();
                String substring3 = trim3.substring(trim3.length() - 1, trim3.length());
                if ((substring3.matches(ValueAggregatorDescriptor.TYPE_SEPARATOR) | substring3.matches(";")) & (!arrayList.contains(Integer.valueOf(i8)))) {
                    arrayList.add(Integer.valueOf(i8));
                }
            }
        }
        for (int i9 = 0; i9 < dArr2.length; i9++) {
            String substring4 = linedata.get(sectionAttr.sl + i9).chars.trim().substring(0, 1);
            if ((substring4.matches("●") | substring4.matches("•")) & (!arrayList.contains(Integer.valueOf(i9)))) {
                arrayList.add(Integer.valueOf(i9));
            }
        }
        if (i2 / sectionAttr.num > caps_thr) {
            for (int i10 = 1; i10 < dArr2.length; i10++) {
                if ((iArr[i10] == 1) & (!arrayList.contains(Integer.valueOf(i10)))) {
                    arrayList.add(Integer.valueOf(i10));
                }
            }
        }
        if (arrayList.isEmpty()) {
            for (int i11 = 0; i11 < dArr2.length; i11++) {
                linedata.get(i11 + sectionAttr.sl).p = String.valueOf(i) + "_0";
            }
            return;
        }
        int[] sortArrayList = Utils.sortArrayList(arrayList);
        for (int i12 = 0; i12 < dArr2.length; i12++) {
            int i13 = 0;
            while (true) {
                if (i13 < sortArrayList.length) {
                    if (i12 + sectionAttr.sl < linedata.size()) {
                        if (i12 < sortArrayList[i13]) {
                            linedata.get(i12 + sectionAttr.sl).p = String.valueOf(i) + "_" + i13;
                            break;
                        }
                        linedata.get(i12 + sectionAttr.sl).p = String.valueOf(i) + "_" + (i13 + 1);
                    }
                    i13++;
                }
            }
        }
    }

    private static void createSections() {
        int i = 0;
        int i2 = 1;
        int i3 = 0;
        sectiondata.add(new SectionAttr(linedata.get(0).chars, linedata.get(0).x, linedata.get(0).y, linedata.get(0).fs, linedata.get(0).h, linedata.get(0).w, "sec_0", linedata.get(0).t, 1, 0, 0));
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i4 = 0; i4 < linedata.size(); i4++) {
            arrayList2.add(linedata.get(i4));
        }
        for (int i5 = 1; i5 < linedata.size(); i5++) {
            if (((linedata.get(i5).t == linedata.get(i5 - 1).t) && (Math.abs((linedata.get(i5).y - linedata.get(i5 - 1).y) - linedata.get(i5 - 1).h) < 4.0f * linedata.get(i5 - 1).h)) && vert_overlap(linedata.get(i5 - 1), linedata.get(i5))) {
                i2++;
                sectiondata.set(i, new SectionAttr(String.valueOf(sectiondata.get(i).chars) + "\n" + linedata.get(i5).chars, Math.min(sectiondata.get(i).x, linedata.get(i5).x), Math.min(sectiondata.get(i).y, linedata.get(i5).y), linedata.get(i5).fs, sectiondata.get(i).h + linedata.get(i5).h, Math.max(linedata.get(i5 - 1).w, linedata.get(i5).w), sectiondata.get(i).p, linedata.get(i5).t, i2, i3, i5));
            } else {
                i2 = 1;
                i3 = i5;
                i++;
                sectiondata.add(new SectionAttr(linedata.get(i5).chars, linedata.get(i5).x, linedata.get(i5).y, linedata.get(i5).fs, linedata.get(i5).h, linedata.get(i5).w, "sec_" + Integer.toString(i), linedata.get(i5).t, 1, i3, i3));
            }
        }
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        for (int i6 = 0; i6 < sectiondata.size(); i6++) {
            arrayList4.add(sectiondata.get(i6));
        }
        for (int i7 = 0; i7 < sectiondata.size(); i7++) {
            if (sectiondata.get(i7).num == 1) {
                arrayList3.add(sectiondata.get(i7));
                arrayList.add(linedata.get(sectiondata.get(i7).sl));
            } else {
                float[] fArr = new float[sectiondata.get(i7).num];
                float[] fArr2 = new float[sectiondata.get(i7).num];
                for (int i8 = 0; i8 < sectiondata.get(i7).num; i8++) {
                    fArr[i8] = linedata.get(sectiondata.get(i7).sl + i8).y;
                    fArr2[i8] = linedata.get(sectiondata.get(i7).sl + i8).x;
                }
                Arrays.sort(fArr);
                Arrays.sort(fArr2);
                ArrayList arrayList5 = new ArrayList();
                String str = "";
                for (float f : fArr) {
                    for (float f2 : fArr2) {
                        int i9 = 0;
                        while (true) {
                            if (i9 < sectiondata.get(i7).num) {
                                if (!arrayList5.contains(Integer.valueOf(i9))) {
                                    if ((linedata.get(sectiondata.get(i7).sl + i9).y == f) & (linedata.get(sectiondata.get(i7).sl + i9).x == f2)) {
                                        arrayList5.add(Integer.valueOf(i9));
                                        str = String.valueOf(str) + linedata.get(sectiondata.get(i7).sl + i9).chars + "\n";
                                        arrayList.add(linedata.get(sectiondata.get(i7).sl + i9));
                                        break;
                                    }
                                }
                                i9++;
                            }
                        }
                    }
                }
                arrayList3.add(sectiondata.get(i7));
                ((SectionAttr) arrayList3.get(i7)).chars = str.substring(0, str.length() - 1);
            }
        }
        sectiondata.clear();
        for (int i10 = 0; i10 < arrayList3.size(); i10++) {
            sectiondata.add((SectionAttr) arrayList3.get(i10));
            LOGGER.debug(sectiondata.get(i10).chars);
            LOGGER.debug("-------------");
        }
        linedata.clear();
        for (int i11 = 0; i11 < arrayList.size(); i11++) {
            linedata.add((LineAttr) arrayList.get(i11));
        }
        for (int i12 = 0; i12 < sectiondata.size(); i12++) {
            sectiondata.get(i12).x = Math.round(sectiondata.get(i12).x);
            sectiondata.get(i12).y = Math.round(sectiondata.get(i12).y + 1.0f);
        }
    }

    private static boolean vert_overlap(LineAttr lineAttr, LineAttr lineAttr2) {
        return Math.min(lineAttr.w, lineAttr2.w) - Math.max(lineAttr.x, lineAttr2.x) > 0.0f;
    }

    private static void segmentSectionsByDistance(SectionAttr sectionAttr, int i, boolean z) {
        double d;
        ArrayList arrayList = new ArrayList();
        for (int i2 = sectionAttr.sl; i2 < sectionAttr.el; i2++) {
            arrayList.add(Double.valueOf(Math.round(Math.abs((linedata.get(i2).y + (z ? linedata.get(i2).fs : linedata.get(i2).fs)) - linedata.get(i2 + 1).y) * 10.0f) / 10.0d));
        }
        int i3 = 0;
        int[] iArr = new int[sectionAttr.num];
        if (arrayList.size() > 1) {
            if (arrayList.size() > 3) {
                double[] dArr = new double[arrayList.size()];
                for (int i4 = 0; i4 < dArr.length; i4++) {
                    dArr[i4] = ((Double) arrayList.get(i4)).doubleValue();
                }
                d = Utils.find_most_commonValue(dArr)[0];
            } else {
                d = -1.0d;
            }
            for (int i5 = 0; i5 < arrayList.size(); i5++) {
                if (d > 0.0d) {
                    if (((Double) arrayList.get(i5)).doubleValue() >= 2.715d * d) {
                        i3++;
                        iArr[i5] = 1;
                    }
                } else if (i5 > 0) {
                    if ((((Double) arrayList.get(i5)).doubleValue() > 1.5d * ((double) sectionAttr.fs)) & (((Double) arrayList.get(i5)).doubleValue() > 1.5d * ((Double) arrayList.get(i5 - 1)).doubleValue())) {
                        i3++;
                        iArr[i5] = 1;
                    }
                }
            }
        }
        int i6 = 0;
        if (i3 == 0) {
            for (int i7 = sectionAttr.sl; i7 < sectionAttr.el + 1; i7++) {
                linedata.get(i7).p = String.valueOf(i) + "_0";
            }
            return;
        }
        for (int i8 = sectionAttr.sl; i8 < sectionAttr.el; i8++) {
            linedata.get(i8).p = String.valueOf(i) + "_" + i6;
            if ((iArr[i8 - sectionAttr.sl] == 1) & (i3 > 0)) {
                i6++;
                i3--;
            }
        }
        for (int i9 = sectionAttr.sl; i9 < sectionAttr.el + 1; i9++) {
            if (linedata.get(i9).p.isEmpty()) {
                linedata.get(i9).p = String.valueOf(i) + "_" + i6;
            }
        }
    }

    private static ArrayList<Double> findFontHeights() {
        ArrayList<Double> arrayList = new ArrayList<>();
        double[] dArr = new double[linedata.size() + 1];
        dArr[linedata.size()] = 100000.0d;
        for (int i = 0; i < linedata.size(); i++) {
            dArr[i] = linedata.get(i).fs;
        }
        Arrays.sort(dArr);
        double d = 0.0d;
        for (int i2 = 0; i2 < dArr.length; i2++) {
            if (Math.abs(dArr[i2] - d) > fontsize_thr) {
                arrayList.add(Double.valueOf(dArr[i2]));
                d = dArr[i2];
            }
        }
        return arrayList;
    }

    private static void represent_textline_heights() {
        String str = "";
        float f = -1.0f;
        float f2 = 1.0E9f;
        float f3 = 0.0f;
        float f4 = 0.0f;
        float f5 = 0.0f;
        float f6 = 0.0f;
        ArrayList arrayList = new ArrayList();
        int i = 1;
        while (i < chardata.size()) {
            if (chardata.get(i).character.length() > 1 && !chardata.get(i).character.contains("f")) {
                chardata.get(i).character = ShingleFilter.TOKEN_SEPARATOR;
            }
            if ((chardata.get(i).p != chardata.get(i - 1).p) || (i == chardata.size() - 1)) {
                String str2 = String.valueOf(str) + chardata.get(i - 1).character;
                arrayList.add(Float.valueOf(chardata.get(i - 1).h));
                double[] dArr = new double[arrayList.size()];
                for (int i2 = 0; i2 < arrayList.size(); i2++) {
                    dArr[i2] = ((Float) arrayList.get(i2)).floatValue();
                }
                Arrays.sort(dArr);
                if (str2.trim().length() > 0) {
                    linedata.add(new LineAttr(str2, f, Math.min(f2, chardata.get(i - 1).y), (float) Utils.getMedian(dArr), (f4 + chardata.get(i - 1).xs) / str2.length(), f6 - f2, (f5 + chardata.get(i - 1).s) / str2.length(), chardata.get(i - 1).x + chardata.get(i - 1).w, "", -3));
                }
                str = "";
                f = -1.0f;
                f2 = 1.0E9f;
                f3 = 0.0f;
                f4 = 0.0f;
                f5 = 0.0f;
                f6 = 0.0f;
                arrayList.clear();
            } else {
                str = String.valueOf(str) + chardata.get(i - 1).character;
                if (Math.abs(chardata.get(i - 1).h - chardata.get(i).h) <= fontsize_thr) {
                    f2 = Math.min(f2, chardata.get(i - 1).y);
                    f6 = Math.max(f6, f2 + chardata.get(i - 1).h);
                }
                arrayList.add(Float.valueOf(chardata.get(i - 1).h));
                f3 += chardata.get(i - 1).h;
                f4 += chardata.get(i - 1).xs;
                f5 += chardata.get(i - 1).s;
                if (f < 0.0f) {
                    f = chardata.get(i - 1).x;
                }
            }
            i++;
        }
    }

    private static void represent_textline_fonts() {
        String str = "";
        float f = -1.0f;
        float f2 = 1.0E9f;
        float f3 = 0.0f;
        float f4 = 0.0f;
        float f5 = 0.0f;
        float f6 = 0.0f;
        int i = 1;
        while (i < chardata.size()) {
            if ((chardata.get(i).p != chardata.get(i - 1).p) || (i == chardata.size() - 1)) {
                LineAttr lineAttr = new LineAttr(String.valueOf(str) + chardata.get(i - 1).character, f, Math.min(f2, chardata.get(i - 1).y), f3, (f4 + chardata.get(i - 1).xs) / r0.length(), f6 - f2, (f5 + chardata.get(i - 1).s) / r0.length(), chardata.get(i - 1).x + chardata.get(i - 1).w, "", -3);
                if (lineAttr.chars.trim().length() > 0) {
                    linedata.add(lineAttr);
                }
                str = "";
                f = -1.0f;
                f2 = 1.0E9f;
                f3 = 0.0f;
                f4 = 0.0f;
                f5 = 0.0f;
                f6 = 0.0f;
            } else {
                str = String.valueOf(str) + chardata.get(i - 1).character;
                if (Math.abs(chardata.get(i - 1).fs - chardata.get(i).fs) <= fontsize_thr) {
                    f2 = Math.min(f2, chardata.get(i - 1).y);
                    f6 = Math.max(f6, chardata.get(i - 1).y + chardata.get(i - 1).h);
                }
                f3 = Math.max(f3, chardata.get(i - 1).fs);
                f4 += chardata.get(i - 1).xs;
                f5 += chardata.get(i - 1).s;
                if (f < 0.0f) {
                    f = chardata.get(i - 1).x;
                }
            }
            i++;
        }
    }

    private static int put_chars_in_textlines(float f) {
        float f2;
        float f3;
        int i = 0;
        ArrayList<PrintTextLocations.CharAttr> arrayList = new ArrayList<>();
        ArrayList arrayList2 = new ArrayList();
        for (int i2 = 0; i2 < chardata.size(); i2++) {
            if ((chardata.get(i2).h > 0.0f) || (chardata.get(i2).s > 0.0f)) {
                arrayList.add(chardata.get(i2));
                arrayList2.add(Float.valueOf(chardata.get(i2).y));
            } else {
                LOGGER.debug("Discard:" + chardata.get(i2).character);
            }
        }
        float[] fArr = new float[arrayList2.size()];
        for (int i3 = 0; i3 < fArr.length; i3++) {
            fArr[i3] = ((Float) arrayList2.get(i3)).floatValue();
        }
        Arrays.sort(fArr);
        if (fArr[fArr.length - 1] > f) {
            LOGGER.debug("probably the orientation of text in page is vertical.");
        } else {
            LOGGER.debug("probably the orientation of text in page is horizontal.");
        }
        chardata.clear();
        chardata = arrayList;
        chardata.add(chardata.get(chardata.size() - 1));
        chardata.get(0).p = 0;
        for (int i4 = 1; i4 < chardata.size(); i4++) {
            String normalizeText = ContentNormalizer.normalizeText(chardata.get(i4).character);
            boolean z = normalizeText.equals(ShingleFilter.TOKEN_SEPARATOR) && ((chardata.get(i4 - 1).character.equals("fi") | chardata.get(i4 - 1).character.equals("ff")) | chardata.get(i4 - 1).character.equals("fl"));
            if ((!z) & (normalizeText.equals("") | normalizeText.equals(ShingleFilter.TOKEN_SEPARATOR))) {
                chardata.get(i4).character = normalizeText;
                chardata.get(i4).p = chardata.get(i4 - 1).p;
                chardata.get(i4).x = chardata.get(i4 - 1).x;
                chardata.get(i4).y = chardata.get(i4 - 1).y;
                chardata.get(i4).h = chardata.get(i4 - 1).h;
                chardata.get(i4).w = chardata.get(i4 - 1).w;
                chardata.get(i4).fs = chardata.get(i4 - 1).fs;
                chardata.get(i4).xs = chardata.get(i4 - 1).xs;
                chardata.get(i4).s = chardata.get(i4 - 1).s;
            }
            if ((chardata.get(i4).w == 0.0f) || z) {
                chardata.get(i4).character = "";
                chardata.get(i4).p = chardata.get(i4 - 1).p;
                chardata.get(i4).x = chardata.get(i4 - 1).x;
                chardata.get(i4).y = chardata.get(i4 - 1).y;
                chardata.get(i4).h = chardata.get(i4 - 1).h;
                chardata.get(i4).w = chardata.get(i4 - 1).w;
                chardata.get(i4).fs = chardata.get(i4 - 1).fs;
                chardata.get(i4).xs = chardata.get(i4 - 1).xs;
                chardata.get(i4).s = chardata.get(i4 - 1).s;
            } else {
                float f4 = chardata.get(i4 - 1).y;
                float f5 = chardata.get(i4).y;
                float f6 = chardata.get(i4 - 1).x;
                float f7 = chardata.get(i4).x;
                float f8 = chardata.get(i4 - 1).h;
                float f9 = chardata.get(i4).h;
                float f10 = chardata.get(i4 - 1).w;
                if (i4 > 1) {
                    f2 = chardata.get(i4 - 2).y;
                    f3 = chardata.get(i4 - 2).h;
                } else {
                    f2 = 1000000.0f;
                    f3 = 1.0E7f;
                }
                if ((((((f5 >= f4) & (f5 <= f4 + f8)) | ((f5 + f9 >= f4) & (Math.round(f5 + f9) <= Math.round(f4 + f8))) | (Math.min(f4 + f8, f5 + f9) - Math.max(f4, f5) >= 0.0f) | (Math.min(f2 + f3, f5 + f9) - Math.max(f5, f2) >= 0.0f)) & (((double) f7) > ((double) f6) - fontsize_thr) & (((double) f7) <= ((double) (f6 + (7.0f * f10))) + fontsize_thr)) || normalizeText.equals("|")) || normalizeText.equals("©")) {
                    if (f7 < f6) {
                        chardata.get(i4 - 1).character = "";
                        chardata.get(i4 - 1).x = chardata.get(i4).x;
                        chardata.get(i4 - 1).y = chardata.get(i4).y;
                        chardata.get(i4 - 1).h = chardata.get(i4).h;
                        chardata.get(i4 - 1).w = chardata.get(i4).w;
                        chardata.get(i4 - 1).fs = chardata.get(i4).fs;
                        chardata.get(i4 - 1).xs = chardata.get(i4).xs;
                        chardata.get(i4 - 1).s = chardata.get(i4).s;
                    }
                    chardata.get(i4).p = chardata.get(i4 - 1).p;
                } else {
                    if (f5 >= f4) {
                        float f11 = f4 + f8;
                    }
                    i++;
                    chardata.get(i4).p = i;
                }
            }
        }
        return i;
    }

    private static void categorizePerFontsize(ArrayList<Double> arrayList) {
        for (int i = 0; i < linedata.size(); i++) {
            int i2 = 0;
            while (true) {
                if (i2 < arrayList.size()) {
                    if (Math.abs(linedata.get(i).fs - arrayList.get(i2).doubleValue()) < fontsize_thr) {
                        linedata.get(i).t = i2;
                        break;
                    }
                    i2++;
                }
            }
        }
    }

    private static ArrayList<Double> findFontSizes() {
        ArrayList<Double> arrayList = new ArrayList<>();
        HashSet hashSet = new HashSet();
        for (int i = 0; i < chardata.size(); i++) {
            if (!hashSet.contains(Double.valueOf(chardata.get(i).fs))) {
                hashSet.add(Double.valueOf(Math.round(chardata.get(i).fs)));
            }
        }
        Double[] dArr = new Double[hashSet.size() + 1];
        dArr[hashSet.size()] = Double.valueOf(100000.0d);
        Iterator it = hashSet.iterator();
        int i2 = 0;
        while (it.hasNext()) {
            dArr[i2] = (Double) it.next();
            i2++;
        }
        Arrays.sort(dArr);
        for (int i3 = 1; i3 < dArr.length; i3++) {
            if (Math.abs(dArr[i3].doubleValue() - dArr[i3 - 1].doubleValue()) > fontsize_thr) {
                arrayList.add(dArr[i3 - 1]);
            }
        }
        return arrayList;
    }

    private static void estimate_space_thr_per_line(int i, boolean z) {
        float[] fArr = new float[i + 1];
        int i2 = 0;
        ArrayList arrayList = new ArrayList();
        String str = "";
        for (int i3 = 1; i3 < chardata.size(); i3++) {
            float f = chardata.get(i3 - 1).x;
            float f2 = chardata.get(i3).x;
            float f3 = chardata.get(i3 - 1).w;
            if (!chardata.get(i3 - 1).character.equals("")) {
                if (chardata.get(i3).p == chardata.get(i3 - 1).p) {
                    str = String.valueOf(str) + chardata.get(i3 - 1).character;
                    if ((f2 - f) - f3 > 0.0f) {
                        arrayList.add(Double.valueOf((f2 - f) - f3));
                    }
                } else {
                    if (z) {
                        fArr[i2] = Math.min((float) Utils.otsu(arrayList), chardata.get(i3 - 1).fs / 5.0f);
                    } else {
                        fArr[i2] = (float) Utils.otsu(arrayList);
                    }
                    i2++;
                    arrayList.clear();
                    str = "";
                }
            }
        }
        for (int i4 = 1; i4 < chardata.size(); i4++) {
            float f4 = chardata.get(i4 - 1).x;
            float f5 = chardata.get(i4).x;
            float f6 = chardata.get(i4 - 1).w;
            if (chardata.get(i4).p == chardata.get(i4 - 1).p && fArr[chardata.get(i4 - 1).p] > 0.0f && (f5 - f4) - f6 >= fArr[chardata.get(i4 - 1).p]) {
                chardata.get(i4 - 1).character = String.valueOf(chardata.get(i4 - 1).character) + ShingleFilter.TOKEN_SEPARATOR;
            }
        }
    }
}
