package gr.ilsp.fmc.main;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.http.cookie.ClientCookie;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
import org.apache.pdfbox.pdmodel.common.PDPageLabelRange;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationText;

/* loaded from: input_file:gr/ilsp/fmc/main/SimpleCrawlOptions.class */
public class SimpleCrawlOptions {
    private Options options;
    private String _outputDir;
    private String _agentName;
    private String _dbHost;
    private String _topic;
    private String _language;
    private String[] _langKeys;
    private String _urls;
    private String _dbName;
    public static int NO_CRAWL_DURATION = 0;
    private static final Logger LOGGER = Logger.getLogger(SimpleCrawlOptions.class);
    private final String APPNAME = "SimpleCrawl";
    private String _domain = null;
    private boolean _debug = false;
    private String _loggingAppender = null;
    private int _threads = 10;
    private int _numLoops = 1;
    private int _crawlDuration = 0;
    private int _minTokensNumber = 200;

    public SimpleCrawlOptions() {
        createOptions();
    }

    private Options createOptions() {
        this.options = new Options();
        Options options = this.options;
        OptionBuilder.withLongOpt(ClientCookie.DOMAIN_ATTR);
        OptionBuilder.withDescription("domain to crawl (e.g. cnn.com). Use for crawling ONLY inside one domain");
        OptionBuilder.hasArg();
        options.addOption(OptionBuilder.create("d"));
        Options options2 = this.options;
        OptionBuilder.withLongOpt("urls");
        OptionBuilder.withDescription("file with list of urls to crawl");
        OptionBuilder.hasArg();
        options2.addOption(OptionBuilder.create("u"));
        Options options3 = this.options;
        OptionBuilder.withLongOpt("debug");
        OptionBuilder.withDescription("debug logging");
        options3.addOption(OptionBuilder.create("dbg"));
        Options options4 = this.options;
        OptionBuilder.withLongOpt("loggingAppender");
        OptionBuilder.withDescription("set logging appender (console, DRFA)");
        OptionBuilder.hasArg();
        options4.addOption(OptionBuilder.create("l"));
        Options options5 = this.options;
        OptionBuilder.withLongOpt("outputdir");
        OptionBuilder.withDescription("output directory");
        OptionBuilder.hasArg();
        options5.addOption(OptionBuilder.create("o"));
        Options options6 = this.options;
        OptionBuilder.withLongOpt("agentname");
        OptionBuilder.withDescription("user agent name");
        OptionBuilder.hasArg();
        options6.addOption(OptionBuilder.create(PDPageLabelRange.STYLE_LETTERS_LOWER));
        Options options7 = this.options;
        OptionBuilder.withLongOpt("threads");
        OptionBuilder.withDescription("maximum number of fetcher threads to use");
        OptionBuilder.hasArg();
        options7.addOption(OptionBuilder.create("t"));
        Options options8 = this.options;
        OptionBuilder.withLongOpt("numloops");
        OptionBuilder.withDescription("number of fetch/update loops");
        OptionBuilder.hasArg();
        options8.addOption(OptionBuilder.create("n"));
        Options options9 = this.options;
        OptionBuilder.withLongOpt("crawlduration");
        OptionBuilder.withDescription("target crawl duration in minutes");
        OptionBuilder.hasArg();
        options9.addOption(OptionBuilder.create(WikipediaTokenizer.CATEGORY));
        Options options10 = this.options;
        OptionBuilder.withLongOpt("dbhost");
        OptionBuilder.withDescription("Database host");
        OptionBuilder.hasArg();
        options10.addOption(OptionBuilder.create("db"));
        Options options11 = this.options;
        OptionBuilder.withLongOpt("dbname");
        OptionBuilder.withDescription("Database name");
        OptionBuilder.hasArg();
        options11.addOption(OptionBuilder.create("dn"));
        Options options12 = this.options;
        OptionBuilder.withLongOpt("topic");
        OptionBuilder.withDescription("Topic definition");
        OptionBuilder.hasArg();
        options12.addOption(OptionBuilder.create("tc"));
        Options options13 = this.options;
        OptionBuilder.withLongOpt("language");
        OptionBuilder.withDescription("Target language");
        OptionBuilder.hasArg();
        options13.addOption(OptionBuilder.create("lang"));
        Options options14 = this.options;
        OptionBuilder.withLongOpt("help");
        OptionBuilder.withDescription(PDAnnotationText.NAME_HELP);
        OptionBuilder.hasArg();
        options14.addOption(OptionBuilder.create(WikipediaTokenizer.HEADING));
        return this.options;
    }

    public void parseOptions(String[] strArr) {
        try {
            CommandLine parse = new GnuParser().parse(this.options, strArr);
            if (parse.hasOption(WikipediaTokenizer.HEADING)) {
                help();
            }
            if (parse.hasOption("d")) {
                this._domain = parse.getOptionValue("d");
                if (this._domain.startsWith("http")) {
                    LOGGER.error("The target domain should be specified as just the host, without the http protocol: " + this._domain);
                    help();
                }
            } else if (parse.hasOption("u")) {
                this._urls = parse.getOptionValue("u");
                if (!new File(this._urls).exists()) {
                    LOGGER.error("The topic file does not exist.");
                    help();
                }
            } else {
                help();
            }
            if (parse.hasOption("dbg")) {
                this._debug = true;
            }
            if (parse.hasOption("l")) {
                this._loggingAppender = parse.getOptionValue("l");
            }
            if (parse.hasOption("o")) {
                this._outputDir = parse.getOptionValue("o");
            } else {
                help();
            }
            if (parse.hasOption(PDPageLabelRange.STYLE_LETTERS_LOWER)) {
                this._agentName = parse.getOptionValue(PDPageLabelRange.STYLE_LETTERS_LOWER);
            } else {
                help();
            }
            if (parse.hasOption("t")) {
                this._threads = Integer.parseInt(parse.getOptionValue("t"));
            }
            if (parse.hasOption("n")) {
                this._numLoops = Integer.parseInt(parse.getOptionValue("n"));
            }
            if (parse.hasOption(WikipediaTokenizer.CATEGORY)) {
                this._crawlDuration = Integer.parseInt(parse.getOptionValue(WikipediaTokenizer.CATEGORY));
            }
            if (parse.hasOption("db")) {
                this._dbHost = parse.getOptionValue("db");
            } else {
                help();
            }
            if (parse.hasOption("dn")) {
                this._dbName = parse.getOptionValue("dn");
            } else {
                help();
            }
            if (parse.hasOption("tc")) {
                this._topic = parse.getOptionValue("tc");
            }
            if (!parse.hasOption("lang")) {
                help();
            } else {
                this._language = parse.getOptionValue("lang");
                this._langKeys = findKeys4lang(this._language);
            }
        } catch (ParseException e) {
            System.err.println("Parsing options failed.  Reason: " + e.getMessage());
            System.exit(64);
        }
    }

    private String[] findKeys4lang(String str) {
        ArrayList arrayList = new ArrayList();
        String[] split = this._language.split(";");
        File file = new File("conf/langKeys.txt");
        if (file.exists()) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
                do {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    String charSequence = readLine.subSequence(0, readLine.indexOf(">")).toString();
                    for (String str2 : split) {
                        if (charSequence.equals(str2)) {
                            arrayList.add(readLine.subSequence(readLine.indexOf(">") + 1, readLine.length()).toString());
                        }
                    }
                } while (arrayList.size() != split.length);
                bufferedReader.close();
            } catch (IOException e) {
            }
        } else {
            System.out.println("The file for langKeys does not exist.");
        }
        String[] strArr = new String[arrayList.size()];
        for (int i = 0; i < arrayList.size(); i++) {
            strArr[i] = (String) arrayList.get(i);
        }
        return strArr;
    }

    public void help() {
        printHelp("SimpleCrawl", this.options);
        System.exit(0);
    }

    public void printHelp(String str, Options options) {
        new HelpFormatter().printHelp(str, options);
    }

    public String getLanguage() {
        return this._language;
    }

    public String getTopic() {
        return this._topic;
    }

    public String getDomain() {
        return this._domain;
    }

    public boolean isDebug() {
        return this._debug;
    }

    public String getLoggingAppender() {
        return this._loggingAppender;
    }

    public String getOutputDir() {
        return this._outputDir;
    }

    public String getAgentName() {
        return this._agentName;
    }

    public int getThreads() {
        return this._threads;
    }

    public int getNumLoops() {
        return this._numLoops;
    }

    public int getCrawlDuration() {
        return this._crawlDuration;
    }

    public String getDbHost() {
        return this._dbHost;
    }

    public String getDbName() {
        return this._dbName;
    }

    public String getUrls() {
        return this._urls;
    }

    public String[] getLangKeys() {
        return this._langKeys;
    }

    public int getTokensNumber() {
        return this._minTokensNumber;
    }
}
