/*
 * Decompiled with CFR 0.152.
 */
package au.org.ala.names.search;

import au.com.bytecode.opencsv.CSVReader;
import au.org.ala.names.lucene.analyzer.LowerCaseKeywordAnalyzer;
import au.org.ala.names.model.LinnaeanRankClassification;
import au.org.ala.names.model.NameIndexField;
import au.org.ala.names.model.RankType;
import au.org.ala.names.search.ALANameIndexer;
import java.io.File;
import java.io.FileReader;
import java.io.Reader;
import java.util.Date;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.gbif.dwc.record.DarwinCoreRecord;
import org.gbif.dwc.text.Archive;
import org.gbif.dwc.text.ArchiveFactory;
import org.gbif.ecat.model.ParsedName;
import org.gbif.utils.file.ClosableIterator;

public class DwcaNameIndexer
extends ALANameIndexer {
    protected static Log log = LogFactory.getLog(DwcaNameIndexer.class);
    private IndexSearcher lsearcher;
    private IndexWriter writer;
    private String dirTmpIndex;

    public void create(boolean loadingIndex, boolean sciIndex, String indexDirectory, String tmpLoadIndex, String namesDwc, String irmngDwc, String commonNameFile) throws Exception {
        this.dirTmpIndex = tmpLoadIndex;
        LowerCaseKeywordAnalyzer analyzer = new LowerCaseKeywordAnalyzer();
        if (loadingIndex) {
            this.createLoadingIndex(tmpLoadIndex, namesDwc);
        }
        if (sciIndex) {
            this.writer = this.createIndexWriter(new File(indexDirectory + File.separator + "cb"), analyzer, true);
            this.generateIndex();
            this.addSynonymsToIndex(namesDwc);
            this.writer.commit();
            this.writer.forceMerge(1);
            this.writer.close();
        }
        if (irmngDwc != null && new File(irmngDwc).exists()) {
            IndexWriter irmngWriter = this.createIndexWriter(new File(indexDirectory + File.separator + "irmng"), analyzer, true);
            this.indexIrmngDwcA(irmngWriter, irmngDwc);
            irmngWriter.forceMerge(1);
            irmngWriter.close();
        }
        if (commonNameFile != null && new File(commonNameFile).exists()) {
            this.indexCommonNames(this.createIndexWriter(new File(indexDirectory + File.separator + "vernacular"), (Analyzer)new KeywordAnalyzer(), true), commonNameFile);
        }
    }

    private void indexCommonNames(IndexWriter iw, String file) throws Exception {
        log.info((Object)"Starting to load the common names");
        int i = 0;
        int count = 0;
        CSVReader cbreader = new CSVReader((Reader)new FileReader(file), '\t', '\"', '\\', 0);
        String[] values = cbreader.readNext();
        while (values != null) {
            ++i;
            if (values.length == 6) {
                String lsid = StringUtils.isNotEmpty((String)values[1]) ? values[1] : values[0];
                TopDocs result = this.getLoadIdxResults("lsid", lsid, 1);
                if (result.totalHits > 0) {
                    Document doc = this.getCommonNameDocument(values[3], values[2], lsid, 1.0f, false);
                    iw.addDocument((Iterable)doc);
                    ++count;
                }
            } else {
                System.out.println("Issue on line " + i + "  " + values[0]);
            }
            if (i % 1000 == 0) {
                log.debug((Object)("Finished processing " + i + " common names with " + count + " added to index "));
                System.out.println(new Date() + "Finished processing " + i + " common names with " + count + " added to index ");
            }
            values = cbreader.readNext();
        }
        System.out.println(new Date() + "Finished processing " + i + " common names with " + count + " added to index ");
        iw.commit();
        iw.forceMerge(1);
        iw.close();
    }

    private void createLoadingIndex(String tmpIndexDir, String archiveDirectory) throws Exception {
        log.info((Object)"Starting to create the temporary loading index.");
        File indexDir = new File(tmpIndexDir);
        IndexWriter iw = this.createIndexWriter(indexDir, (Analyzer)new KeywordAnalyzer(), true);
        Archive archive = ArchiveFactory.openArchive((File)new File(archiveDirectory));
        ClosableIterator it = archive.iteratorDwc();
        int i = 0;
        long start = System.currentTimeMillis();
        while (it.hasNext()) {
            Document doc = new Document();
            DarwinCoreRecord dwcr = (DarwinCoreRecord)it.next();
            String id = dwcr.getId();
            String lsid = dwcr.getTaxonID() == null ? id : dwcr.getTaxonID();
            String acceptedLsid = dwcr.getAcceptedNameUsageID();
            doc.add((IndexableField)new StringField(NameIndexField.ID.toString(), dwcr.getId(), Field.Store.YES));
            if (StringUtils.isNotBlank((String)lsid)) {
                doc.add((IndexableField)new StringField(NameIndexField.LSID.toString(), lsid, Field.Store.YES));
            } else {
                System.out.println("LSID is null for " + id + " " + lsid + " " + lsid + " " + acceptedLsid);
            }
            if (StringUtils.isNotBlank((String)dwcr.getParentNameUsageID())) {
                doc.add((IndexableField)new StringField("parent_id", dwcr.getParentNameUsageID(), Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)dwcr.getAcceptedNameUsageID())) {
                doc.add((IndexableField)new StringField(NameIndexField.ACCEPTED.toString(), dwcr.getAcceptedNameUsageID(), Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)dwcr.getScientificName())) {
                doc.add((IndexableField)new StoredField(NameIndexField.NAME.toString(), dwcr.getScientificName()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getScientificNameAuthorship())) {
                doc.add((IndexableField)new StoredField(NameIndexField.AUTHOR.toString(), dwcr.getScientificNameAuthorship()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getGenus())) {
                doc.add((IndexableField)new StoredField("genus", dwcr.getGenus()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getSpecificEpithet())) {
                doc.add((IndexableField)new StoredField(NameIndexField.SPECIFIC.toString(), dwcr.getSpecificEpithet()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getInfraspecificEpithet())) {
                doc.add((IndexableField)new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), dwcr.getInfraspecificEpithet()));
            }
            if (StringUtils.isNotBlank((String)dwcr.getTaxonRank())) {
                RankType rt = RankType.getForStrRank(dwcr.getTaxonRank());
                if (rt != null) {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES));
                    doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), rt.getId().toString(), Field.Store.YES));
                } else {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), dwcr.getTaxonRank(), Field.Store.YES));
                    doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES));
                doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES));
            }
            if (StringUtils.equals((String)lsid, (String)acceptedLsid) || StringUtils.equals((String)id, (String)acceptedLsid) || acceptedLsid == null) {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES));
                if (StringUtils.isBlank((String)dwcr.getParentNameUsageID())) {
                    doc.add((IndexableField)new StringField("root", "T", Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES));
            }
            iw.addDocument((Iterable)doc);
            if (++i % 1000 != 0) continue;
            long finish = System.currentTimeMillis();
            log.debug((Object)("Loading index: " + i + " records per sec: " + 1000.0f / ((float)(finish / start) / 1000.0f)));
            start = finish;
        }
        log.info((Object)("Finished creating the temporary load index with " + i + " concepts"));
        iw.commit();
        iw.forceMerge(1);
        iw.close();
        this.lsearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((File)indexDir)));
    }

    private TopDocs getLoadIdxResults(String field, String value, int max) throws Exception {
        if (this.lsearcher == null && new File(this.dirTmpIndex).exists()) {
            this.lsearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((File)new File(this.dirTmpIndex))));
        }
        TermQuery tq = new TermQuery(new Term(field, value));
        return this.lsearcher.search((Query)tq, max);
    }

    private void generateIndex() throws Exception {
        int left;
        TopDocs rootConcepts = this.getLoadIdxResults("root", "T", 25000);
        int right = left = 0;
        for (ScoreDoc sd : rootConcepts.scoreDocs) {
            left = right + 1;
            Document doc = this.lsearcher.doc(sd.doc);
            right = this.addIndex(doc, 1, left, new LinnaeanRankClassification());
            log.info((Object)("Finished loading " + doc.get(NameIndexField.LSID.toString()) + " " + doc.get(NameIndexField.NAME.toString()) + " " + left + " " + right));
        }
    }

    private int addIndex(Document doc, int currentDepth, int currentLeft, LinnaeanRankClassification higherClass) throws Exception {
        int left;
        String id = doc.get(NameIndexField.ID.toString());
        TopDocs children = this.getLoadIdxResults("parent_id", id, 25000);
        if (children.totalHits == 0) {
            children = this.getLoadIdxResults("parent_id", doc.get(NameIndexField.LSID.toString()), 25000);
        }
        int right = left = currentLeft;
        int rankId = Integer.parseInt(doc.get(NameIndexField.RANK_ID.toString()));
        String name = doc.get(NameIndexField.NAME.toString());
        String lsid = doc.get(NameIndexField.LSID.toString());
        String cname = this.getCanonical(name);
        LinnaeanRankClassification newcl = new LinnaeanRankClassification(higherClass);
        switch (rankId) {
            case 1000: {
                newcl.setKingdom(cname);
                newcl.setKid(lsid);
                break;
            }
            case 2000: {
                newcl.setPhylum(cname);
                newcl.setPid(lsid);
                break;
            }
            case 3000: {
                newcl.setKlass(cname);
                newcl.setCid(lsid);
                break;
            }
            case 4000: {
                newcl.setOrder(cname);
                newcl.setOid(lsid);
                break;
            }
            case 5000: {
                newcl.setFamily(cname);
                newcl.setFid(lsid);
                break;
            }
            case 6000: {
                newcl.setGenus(cname);
                newcl.setGid(lsid);
                break;
            }
            case 7000: {
                newcl.setSpecies(cname);
                newcl.setSid(lsid);
            }
        }
        for (ScoreDoc child : children.scoreDocs) {
            Document cdoc = this.lsearcher.doc(child.doc);
            right = this.addIndex(cdoc, currentDepth + 1, right + 1, newcl);
        }
        if (left % 2000 == 0) {
            log.debug((Object)("Last processed lft:" + left + " rgt:" + right + " depth:" + currentDepth + " classification " + newcl));
        }
        Document indexDoc = this.createALAIndexDocument(cname, doc.get(NameIndexField.ID.toString()), lsid, doc.get(NameIndexField.AUTHOR.toString()), doc.get(NameIndexField.RANK.toString()), doc.get(NameIndexField.RANK_ID.toString()), Integer.toString(left), Integer.toString(right), newcl);
        this.writer.addDocument((Iterable)indexDoc);
        return right + 1;
    }

    private String getCanonical(String name) {
        try {
            ParsedName pn = this.parser.parse(name);
            if (pn.isParsableType()) {
                return pn.canonicalName();
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        return name;
    }

    private void addSynonymsToIndex(String dwcaDir) throws Exception {
        Archive archive = ArchiveFactory.openArchive((File)new File(dwcaDir));
        ClosableIterator it = archive.iteratorDwc();
        int i = 0;
        int count = 0;
        while (it.hasNext()) {
            DarwinCoreRecord dwcr = (DarwinCoreRecord)it.next();
            ++i;
            String lsid = dwcr.getTaxonID() != null ? dwcr.getTaxonID() : dwcr.getId();
            String id = dwcr.getId();
            String acceptedId = dwcr.getAcceptedNameUsageID();
            if (StringUtils.isNotEmpty((String)acceptedId) && !StringUtils.equals((String)acceptedId, (String)id) && !StringUtils.equals((String)acceptedId, (String)lsid)) {
                ++count;
                this.writer.addDocument((Iterable)this.createALASynonymDocument(dwcr.getScientificName(), dwcr.getScientificNameAuthorship(), dwcr.getId(), lsid, lsid, dwcr.getAcceptedNameUsageID(), dwcr.getAcceptedNameUsageID(), 1.0f, dwcr.getTaxonomicStatus()));
            }
            if (i % 1000 != 0) continue;
            log.debug((Object)("Processed " + i + " records " + count + " synonyms"));
        }
    }

    public static void main(String[] args) {
        Options options = new Options();
        options.addOption("load", false, "Generate the load index");
        options.addOption("all", false, "Generates the load index and search index");
        options.addOption("search", false, "Generates the search index");
        options.addOption("irmng", true, "The absolute path to the irmng DWCA. irmng is used to detect homonyms");
        options.addOption("dwca", true, "The absolute path to the dwca for the scientific names");
        options.addOption("target", true, "The target directory for the name matching index");
        options.addOption("tmp", true, "The tmp directory for the load index");
        options.addOption("common", true, "The common name file");
        BasicParser parser = new BasicParser();
        try {
            boolean search;
            CommandLine line = parser.parse(options, args);
            boolean load = line.hasOption("load") || line.hasOption("all");
            boolean bl = search = line.hasOption("search") || line.hasOption("all");
            if (line.getOptionValue("dwca") == null) {
                System.out.println("Unable to index without scientific name DWCA");
                System.exit(-1);
            }
            DwcaNameIndexer indexer = new DwcaNameIndexer();
            indexer.create(load, search, line.getOptionValue("target", "/data/lucene/namematching"), line.getOptionValue("tmp", "/data/tmp/lucene/nmload"), line.getOptionValue("dwca"), line.getOptionValue("irmng"), line.getOptionValue("common"));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

