/*
 * Decompiled with CFR 0.152.
 */
package au.org.ala.names.search;

import au.com.bytecode.opencsv.CSVReader;
import au.org.ala.names.lucene.analyzer.LowerCaseKeywordAnalyzer;
import au.org.ala.names.model.LinnaeanRankClassification;
import au.org.ala.names.model.NameIndexField;
import au.org.ala.names.model.NameSearchResult;
import au.org.ala.names.model.RankType;
import au.org.ala.names.search.ALANameIndexer;
import au.org.ala.names.search.ALANameSearcher;
import java.io.File;
import java.io.FileReader;
import java.io.InputStream;
import java.io.Reader;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.gbif.dwc.record.DarwinCoreRecord;
import org.gbif.dwc.record.Record;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifTerm;
import org.gbif.dwc.text.Archive;
import org.gbif.dwc.text.ArchiveFactory;
import org.gbif.dwc.text.ArchiveFile;
import org.gbif.ecat.model.ParsedName;
import org.gbif.utils.file.ClosableIterator;

public class DwcaNameIndexer
extends ALANameIndexer {
    protected static Logger log = Logger.getLogger(DwcaNameIndexer.class);
    private IndexSearcher lsearcher;
    private IndexWriter writer;
    private String dirTmpIndex;

    public void create(boolean loadingIndex, boolean sciIndex, String indexDirectory, String tmpLoadIndex, String namesDwc, String irmngDwc, String commonNameFile) throws Exception {
        this.dirTmpIndex = tmpLoadIndex;
        LowerCaseKeywordAnalyzer analyzer = new LowerCaseKeywordAnalyzer();
        if (loadingIndex) {
            this.createLoadingIndex(tmpLoadIndex, namesDwc);
        }
        if (sciIndex) {
            this.writer = this.createIndexWriter(new File(indexDirectory + File.separator + "cb"), analyzer, true);
            this.generateIndex();
            this.addSynonymsToIndex(namesDwc);
            this.writer.commit();
            this.writer.forceMerge(1);
            this.writer.close();
        }
        if (irmngDwc != null && new File(irmngDwc).exists()) {
            IndexWriter irmngWriter = this.createIndexWriter(new File(indexDirectory + File.separator + "irmng"), analyzer, true);
            this.indexIrmngDwcA(irmngWriter, irmngDwc);
            irmngWriter.forceMerge(1);
            irmngWriter.close();
        }
        if (commonNameFile != null && new File(commonNameFile).exists()) {
            this.indexCommonNames(this.createIndexWriter(new File(indexDirectory + File.separator + "vernacular"), (Analyzer)new KeywordAnalyzer(), true), commonNameFile);
        } else {
            this.indexCommonNameExtension(this.createIndexWriter(new File(indexDirectory + File.separator + "vernacular"), (Analyzer)new KeywordAnalyzer(), true), namesDwc);
        }
    }

    private void indexCommonNames(IndexWriter iw, String file) throws Exception {
        log.info((Object)"Starting to load the common names");
        int i = 0;
        int count = 0;
        CSVReader cbreader = new CSVReader((Reader)new FileReader(file), '\t', '\"', '\\', 0);
        String[] values = cbreader.readNext();
        while (values != null) {
            ++i;
            if (values.length == 6) {
                String lsid = StringUtils.isNotEmpty((String)values[1]) ? values[1] : values[0];
                TopDocs result = this.getLoadIdxResults("lsid", lsid, 1);
                if (result.totalHits > 0) {
                    Document doc = this.createCommonNameDocument(values[3], values[2], lsid, 1.0f, false);
                    iw.addDocument((Iterable)doc);
                    ++count;
                }
            } else {
                log.info((Object)("Issue on line " + i + "  " + values[0]));
            }
            if (i % 1000 == 0) {
                log.info((Object)("Finished processing " + i + " common names with " + count + " added to index "));
            }
            values = cbreader.readNext();
        }
        log.info((Object)("Finished processing " + i + " common names with " + count + " added to index "));
        iw.commit();
        iw.forceMerge(1);
        iw.close();
    }

    private void indexCommonNameExtension(IndexWriter iw, String archiveDirectory) throws Exception {
        Archive archive = ArchiveFactory.openArchive((File)new File(archiveDirectory));
        ArchiveFile vernacularArchiveFile = archive.getExtension((org.gbif.dwc.terms.Term)GbifTerm.VernacularName);
        Iterator iter = vernacularArchiveFile.iterator();
        int count = 0;
        while (iter.hasNext()) {
            Record record = (Record)iter.next();
            String taxonID = record.id();
            String vernacularName = record.value((org.gbif.dwc.terms.Term)DwcTerm.vernacularName);
            TopDocs result = this.getLoadIdxResults("lsid", taxonID, 1);
            if (result.totalHits <= 0) continue;
            Document sciNameDoc = this.lsearcher.doc(result.scoreDocs[0].doc);
            Document doc = this.createCommonNameDocument(vernacularName, sciNameDoc.get(NameIndexField.NAME.toString()), taxonID, 1.0f, false);
            iw.addDocument((Iterable)doc);
            ++count;
        }
        iw.commit();
        iw.forceMerge(1);
        iw.close();
    }

    private void createLoadingIndex(String tmpIndexDir, String archiveDirectory) throws Exception {
        log.info((Object)"Starting to create the temporary loading index.");
        File indexDir = new File(tmpIndexDir);
        IndexWriter iw = this.createIndexWriter(indexDir, (Analyzer)new KeywordAnalyzer(), true);
        Archive archive = ArchiveFactory.openArchive((File)new File(archiveDirectory));
        ClosableIterator it = archive.iteratorDwc();
        int i = 0;
        long start = System.currentTimeMillis();
        while (it.hasNext()) {
            Document doc = new Document();
            DarwinCoreRecord dwcr = (DarwinCoreRecord)it.next();
            String id = dwcr.getId();
            String lsid = dwcr.getTaxonID() == null ? id : dwcr.getTaxonID();
            String acceptedLsid = dwcr.getAcceptedNameUsageID();
            doc.add((IndexableField)new StringField(NameIndexField.ID.toString(), dwcr.getId(), Field.Store.YES));
            if (StringUtils.isNotBlank((String)lsid)) {
                doc.add((IndexableField)new StringField(NameIndexField.LSID.toString(), lsid, Field.Store.YES));
            } else {
                System.out.println("LSID is null for " + id + " " + lsid + " " + lsid + " " + acceptedLsid);
            }
            if (StringUtils.isNotBlank((String)dwcr.getParentNameUsageID())) {
                doc.add((IndexableField)new StringField("parent_id", dwcr.getParentNameUsageID(), Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)dwcr.getAcceptedNameUsageID())) {
                doc.add((IndexableField)new StringField(NameIndexField.ACCEPTED.toString(), dwcr.getAcceptedNameUsageID(), Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)dwcr.getScientificName())) {
                doc.add((IndexableField)new StoredField(NameIndexField.NAME.toString(), dwcr.getScientificName()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getScientificNameAuthorship())) {
                doc.add((IndexableField)new StoredField(NameIndexField.AUTHOR.toString(), dwcr.getScientificNameAuthorship()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getGenus())) {
                doc.add((IndexableField)new StoredField("genus", dwcr.getGenus()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getSpecificEpithet())) {
                doc.add((IndexableField)new StoredField(NameIndexField.SPECIFIC.toString(), dwcr.getSpecificEpithet()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getInfraspecificEpithet())) {
                doc.add((IndexableField)new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), dwcr.getInfraspecificEpithet()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getTaxonRank())) {
                RankType rt = RankType.getForStrRank(dwcr.getTaxonRank());
                if (rt != null) {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES));
                    doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), rt.getId().toString(), Field.Store.YES));
                } else {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), dwcr.getTaxonRank(), Field.Store.YES));
                    doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES));
                doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES));
            }
            if (StringUtils.equals((String)lsid, (String)acceptedLsid) || StringUtils.equals((String)id, (String)acceptedLsid) || acceptedLsid == null) {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES));
                if (StringUtils.isBlank((String)dwcr.getParentNameUsageID())) {
                    doc.add((IndexableField)new StringField("root", "T", Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES));
            }
            iw.addDocument((Iterable)doc);
            if (++i % 1000 != 0) continue;
            long finish = System.currentTimeMillis();
            log.debug((Object)("Loading index: " + i + " records per sec: " + 1000.0f / ((float)(finish / start) / 1000.0f)));
            start = finish;
        }
        log.info((Object)("Finished creating the temporary load index with " + i + " concepts"));
        iw.commit();
        iw.forceMerge(1);
        iw.close();
        this.lsearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((File)indexDir)));
    }

    private TopDocs getLoadIdxResults(String field, String value, int max) throws Exception {
        if (this.lsearcher == null && new File(this.dirTmpIndex).exists()) {
            this.lsearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((File)new File(this.dirTmpIndex))));
        } else if (this.lsearcher == null && !new File(this.dirTmpIndex).exists()) {
            throw new RuntimeException("A load index has not been generated. Please run this tool with '-load' before creating the search index.");
        }
        TermQuery tq = new TermQuery(new Term(field, value));
        return this.lsearcher.search((Query)tq, max);
    }

    private void generateIndex() throws Exception {
        int left;
        TopDocs rootConcepts = this.getLoadIdxResults("root", "T", 25000);
        int right = left = 0;
        for (ScoreDoc sd : rootConcepts.scoreDocs) {
            left = right + 1;
            Document doc = this.lsearcher.doc(sd.doc);
            right = this.addIndex(doc, 1, left, new LinnaeanRankClassification());
            log.info((Object)("Finished loading " + doc.get(NameIndexField.LSID.toString()) + " " + doc.get(NameIndexField.NAME.toString()) + " " + left + " " + right));
        }
    }

    private int addIndex(Document doc, int currentDepth, int currentLeft, LinnaeanRankClassification higherClass) throws Exception {
        int left;
        String id = doc.get(NameIndexField.ID.toString());
        TopDocs children = this.getLoadIdxResults("parent_id", id, 25000);
        if (children.totalHits == 0) {
            children = this.getLoadIdxResults("parent_id", doc.get(NameIndexField.LSID.toString()), 25000);
        }
        int right = left = currentLeft;
        int rankId = Integer.parseInt(doc.get(NameIndexField.RANK_ID.toString()));
        String name = doc.get(NameIndexField.NAME.toString());
        String lsid = doc.get(NameIndexField.LSID.toString());
        String cname = this.getCanonical(name);
        LinnaeanRankClassification newcl = new LinnaeanRankClassification(higherClass);
        switch (rankId) {
            case 1000: {
                newcl.setKingdom(cname);
                newcl.setKid(lsid);
                break;
            }
            case 2000: {
                newcl.setPhylum(cname);
                newcl.setPid(lsid);
                break;
            }
            case 3000: {
                newcl.setKlass(cname);
                newcl.setCid(lsid);
                break;
            }
            case 4000: {
                newcl.setOrder(cname);
                newcl.setOid(lsid);
                break;
            }
            case 5000: {
                newcl.setFamily(cname);
                newcl.setFid(lsid);
                break;
            }
            case 6000: {
                newcl.setGenus(cname);
                newcl.setGid(lsid);
                break;
            }
            case 7000: {
                newcl.setSpecies(cname);
                newcl.setSid(lsid);
            }
        }
        for (ScoreDoc child : children.scoreDocs) {
            Document cdoc = this.lsearcher.doc(child.doc);
            right = this.addIndex(cdoc, currentDepth + 1, right + 1, newcl);
        }
        if (left % 2000 == 0) {
            log.debug((Object)("Last processed lft:" + left + " rgt:" + right + " depth:" + currentDepth + " classification " + newcl));
        }
        Document indexDoc = this.createALAIndexDocument(cname, doc.get(NameIndexField.ID.toString()), lsid, doc.get(NameIndexField.AUTHOR.toString()), doc.get(NameIndexField.RANK.toString()), doc.get(NameIndexField.RANK_ID.toString()), Integer.toString(left), Integer.toString(right), newcl);
        this.writer.addDocument((Iterable)indexDoc);
        return right + 1;
    }

    private String getCanonical(String name) {
        try {
            ParsedName pn = this.parser.parse(name);
            if (pn.isParsableType()) {
                return pn.canonicalName();
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        return name;
    }

    private void addSynonymsToIndex(String dwcaDir) throws Exception {
        Archive archive = ArchiveFactory.openArchive((File)new File(dwcaDir));
        ClosableIterator it = archive.iteratorDwc();
        int i = 0;
        int count = 0;
        while (it.hasNext()) {
            DarwinCoreRecord dwcr = (DarwinCoreRecord)it.next();
            ++i;
            String lsid = dwcr.getTaxonID() != null ? dwcr.getTaxonID() : dwcr.getId();
            String id = dwcr.getId();
            String acceptedId = dwcr.getAcceptedNameUsageID();
            if (StringUtils.isNotEmpty((String)acceptedId) && !StringUtils.equals((String)acceptedId, (String)id) && !StringUtils.equals((String)acceptedId, (String)lsid)) {
                ++count;
                try {
                    Document doc;
                    if (log.isDebugEnabled()) {
                        log.debug((Object)("Scientific name:  " + dwcr.getScientificName() + ", LSID:  " + dwcr.getId()));
                    }
                    if ((doc = this.createALASynonymDocument(dwcr.getScientificName(), dwcr.getScientificNameAuthorship(), dwcr.getId(), lsid, lsid, dwcr.getAcceptedNameUsageID(), dwcr.getAcceptedNameUsageID(), 1.0f, dwcr.getTaxonomicStatus())) != null) {
                        this.writer.addDocument((Iterable)doc);
                    } else {
                        log.warn((Object)("Problem processing scientificName:  " + dwcr.getScientificName() + ", ID:  " + dwcr.getId() + ", LSID:  " + lsid));
                    }
                }
                catch (Exception e) {
                    log.error((Object)("Exception thrown processing Scientific name:  " + dwcr.getScientificName() + ", LSID:  " + dwcr.getId()));
                    log.error((Object)e.getMessage(), (Throwable)e);
                }
            }
            if (i % 1000 != 0) continue;
            log.debug((Object)("Processed " + i + " records " + count + " synonyms"));
        }
    }

    public static void main(String[] args) {
        String DEFAULT_DWCA = "/data/lucene/sources/dwca-col";
        String DEFAULT_IRMNG = "/data/lucene/sources/IRMNG_DWC_HOMONYMS";
        String DEFAULT_COMMON_NAME = "/data/lucene/sources/col_vernacular.txt";
        String DEFAULT_TARGET_DIR = "/data/lucene/namematching";
        String DEFAULT_TMP_DIR = "/data/lucene/nmload-tmp";
        Options options = new Options();
        options.addOption("v", "version", false, "Retrieve version information");
        options.addOption("h", "help", false, "Retrieve options");
        options.addOption("all", false, "Generates the load index and search index");
        options.addOption("load", false, "Generate the load index only. The load index is a temporary index generated from the raw data files used to load the main search index");
        options.addOption("search", false, "Generates the search index. A load index must already be created for this to run.");
        options.addOption("irmng", true, "The absolute path to the unzipped irmng DwCA. IRMNG is used to detect homonyms. Defaults to /data/lucene/sources/IRMNG_DWC_HOMONYMS");
        options.addOption("dwca", true, "The absolute path to the unzipped DwCA for the scientific names. Defaults to /data/lucene/sources/dwca-col");
        options.addOption("target", true, "The target directory to write the new name index to. Defaults to /data/lucene/namematching");
        options.addOption("tmp", true, "The tmp directory for the load index. Defaults to /data/lucene/nmload-tmp");
        options.addOption("common", true, "The common (vernacular) name file. Defaults to /data/lucene/sources/col_vernacular.txt");
        options.addOption("testSearch", true, "Debug a name search. This uses the target directory to search against.");
        BasicParser parser = new BasicParser();
        try {
            boolean search;
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("v")) {
                InputStream stream = DwcaNameIndexer.class.getResourceAsStream("/git.properties");
                Properties properties = new Properties();
                if (stream != null) {
                    properties.load(stream);
                    properties.list(System.out);
                } else {
                    System.err.println("Unable to retrieve versioning information");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("help")) {
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("testSearch")) {
                boolean indexExists = new File("/data/lucene/namematching").exists();
                if (indexExists) {
                    Map<String, String> props;
                    System.out.println("Search for name");
                    ALANameSearcher searcher = new ALANameSearcher(line.getOptionValue("target", "/data/lucene/namematching"));
                    NameSearchResult nsr = searcher.searchForRecord(line.getOptionValue("testSearch"));
                    if (nsr != null) {
                        props = nsr.toMap();
                        for (Map.Entry<String, String> entry : props.entrySet()) {
                            System.out.println(entry.getKey() + ": " + entry.getValue());
                        }
                    } else {
                        nsr = searcher.searchForCommonName(line.getOptionValue("testSearch"));
                        if (nsr != null) {
                            props = nsr.toMap();
                            for (Map.Entry<String, String> entry : props.entrySet()) {
                                System.out.println(entry.getKey() + ": " + entry.getValue());
                            }
                        } else {
                            System.err.println("No match for " + line.getOptionValue("testSearch"));
                        }
                    }
                    System.exit(1);
                } else {
                    System.err.println("Index unreadable. Check /data/lucene/namematching");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            boolean load = line.hasOption("load") || line.hasOption("all");
            boolean bl = search = line.hasOption("search") || line.hasOption("all");
            if (!(line.hasOption("load") || line.hasOption("search") || line.hasOption("all"))) {
                load = true;
                search = true;
            }
            log.info((Object)("Generating loading index: " + load));
            log.info((Object)("Generating searching index: " + search));
            boolean defaultIrmngReadable = new File("/data/lucene/sources/IRMNG_DWC_HOMONYMS").exists();
            boolean defaultCommonReadable = new File("/data/lucene/sources/col_vernacular.txt").exists();
            boolean defaultDwcaReadable = new File("/data/lucene/sources/dwca-col").exists();
            if (line.getOptionValue("dwca") != null) {
                log.info((Object)("Using the  DwCA name file: " + line.getOptionValue("dwca")));
            } else if (defaultDwcaReadable) {
                log.info((Object)"Using the default DwCA name file: /data/lucene/sources/dwca-col");
            } else {
                log.error((Object)"No DwC Archive specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/dwca-col");
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.getOptionValue("irmng") == null && !defaultIrmngReadable) {
                log.warn((Object)"No IRMNG export specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/IRMNG_DWC_HOMONYMS");
            } else if (line.getOptionValue("irmng") == null) {
                log.info((Object)"Using the default IRMNG name file: /data/lucene/sources/IRMNG_DWC_HOMONYMS");
            } else {
                log.info((Object)("Using the  IRMNG name file: " + line.getOptionValue("irmng")));
            }
            if (line.getOptionValue("common") == null && !defaultCommonReadable) {
                log.warn((Object)"No common name export specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/col_vernacular.txt");
            } else if (line.getOptionValue("common") == null) {
                log.info((Object)"Using the default common name file: /data/lucene/sources/col_vernacular.txt");
            } else {
                log.info((Object)("Using the common name file: " + line.getOptionValue("common")));
            }
            File targetDirectory = new File(line.getOptionValue("target", "/data/lucene/namematching"));
            if (targetDirectory.exists()) {
                String newPath = targetDirectory.getAbsolutePath() + "_" + DateFormatUtils.format((Date)new Date(), (String)"yyyy-MM-dd_hh-mm-ss");
                log.info((Object)("Target directory already exists. Backing up to : " + newPath));
                File newTargetDirectory = new File(newPath);
                FileUtils.moveDirectory((File)targetDirectory, (File)newTargetDirectory);
                FileUtils.forceMkdir((File)targetDirectory);
            }
            DwcaNameIndexer indexer = new DwcaNameIndexer();
            indexer.create(load, search, line.getOptionValue("target", "/data/lucene/namematching"), line.getOptionValue("tmp", "/data/lucene/nmload-tmp"), line.getOptionValue("dwca", "/data/lucene/sources/dwca-col"), line.getOptionValue("irmng", "/data/lucene/sources/IRMNG_DWC_HOMONYMS"), line.getOptionValue("common", "/data/lucene/sources/col_vernacular.txt"));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

