/*
 * Decompiled with CFR 0.152.
 */
package au.org.ala.names.search;

import au.org.ala.names.lucene.analyzer.LowerCaseKeywordAnalyzer;
import au.org.ala.names.model.LinnaeanRankClassification;
import au.org.ala.names.model.NameIndexField;
import au.org.ala.names.model.NameSearchResult;
import au.org.ala.names.model.RankType;
import au.org.ala.names.search.ALANameIndexer;
import au.org.ala.names.search.ALANameSearcher;
import com.opencsv.CSVReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifTerm;
import org.gbif.dwca.io.Archive;
import org.gbif.dwca.io.ArchiveFactory;
import org.gbif.dwca.io.ArchiveField;
import org.gbif.dwca.io.ArchiveFile;
import org.gbif.dwca.record.DarwinCoreRecord;
import org.gbif.dwca.record.Record;
import org.gbif.dwca.record.StarRecord;
import org.gbif.ecat.model.ParsedName;
import org.gbif.utils.file.ClosableIterator;

public class DwcaNameIndexer
extends ALANameIndexer {
    protected static Logger log = Logger.getLogger(DwcaNameIndexer.class);
    private static int PAGE_SIZE = 25000;
    private boolean loadingIndex;
    private boolean sciIndex;
    private File targetDir;
    private File tmpDir;
    private IndexSearcher lsearcher;
    private IndexWriter writer = null;
    private IndexWriter loadingIndexWriter = null;
    private IndexWriter vernacularIndexWriter = null;
    private IndexWriter idWriter = null;
    private LowerCaseKeywordAnalyzer analyzer;
    private Map<String, Float> priorities;

    public DwcaNameIndexer(File targetDir, File tmpDir, Properties priorities, boolean loadingIndex, boolean sciIndex) {
        this.targetDir = targetDir;
        this.tmpDir = tmpDir;
        this.loadingIndex = loadingIndex;
        this.sciIndex = sciIndex;
        this.analyzer = new LowerCaseKeywordAnalyzer();
        this.priorities = this.buildPriorities(priorities);
    }

    public void begin() throws Exception {
        if (this.loadingIndex) {
            this.loadingIndexWriter = this.createIndexWriter(this.tmpDir, (Analyzer)new KeywordAnalyzer(), true);
        }
        if (this.sciIndex) {
            this.writer = this.createIndexWriter(new File(this.targetDir, "cb"), this.analyzer, true);
            this.idWriter = this.createIndexWriter(new File(this.targetDir, "id"), this.analyzer, true);
            this.vernacularIndexWriter = this.createIndexWriter(new File(this.targetDir, "vernacular"), (Analyzer)new KeywordAnalyzer(), true);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void commit() {
        if (this.loadingIndexWriter != null) {
            try {
                this.loadingIndexWriter.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close loading index", (Throwable)ex);
            }
            finally {
                this.loadingIndexWriter = null;
            }
        }
        if (this.writer != null) {
            try {
                this.writer.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close index", (Throwable)ex);
            }
            finally {
                this.writer = null;
            }
        }
        if (this.vernacularIndexWriter != null) {
            try {
                this.vernacularIndexWriter.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close vernacular index", (Throwable)ex);
            }
            finally {
                this.vernacularIndexWriter = null;
            }
        }
        if (this.idWriter != null) {
            try {
                this.idWriter.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close index", (Throwable)ex);
            }
            finally {
                this.idWriter = null;
            }
        }
    }

    protected Map<String, Float> buildPriorities(Properties properties) {
        HashMap<String, Float> map = new HashMap<String, Float>(properties.size());
        for (String ds : properties.stringPropertyNames()) {
            String p = properties.getProperty(ds);
            float pr = 1.0f;
            try {
                pr = Float.parseFloat(p);
            }
            catch (NumberFormatException ex) {
                log.warn((Object)("Unable to parse priority " + p + " for " + ds + " defaulting to 1.0"));
            }
            map.put(ds, Float.valueOf(pr));
        }
        return map;
    }

    public void create(File namesDwc) throws Exception {
        if (namesDwc == null || !namesDwc.exists()) {
            log.warn((Object)("Skipping " + namesDwc + " as it does not exist"));
            return;
        }
        log.info((Object)("Loading synonyms for " + namesDwc));
        this.addSynonymsToIndex(namesDwc);
        this.writer.commit();
        this.writer.forceMerge(1);
        this.indexCommonNameExtension(namesDwc);
    }

    public void createIrmng(File irmngDwc) throws Exception {
        if (irmngDwc == null || !irmngDwc.exists()) {
            return;
        }
        IndexWriter irmngWriter = this.createIndexWriter(new File(this.targetDir, "irmng"), this.analyzer, true);
        this.indexIrmngDwcA(irmngWriter, irmngDwc.getCanonicalPath());
        irmngWriter.commit();
        irmngWriter.forceMerge(1);
        irmngWriter.close();
    }

    private void indexCommonNames(File file) throws Exception {
        if (file == null || !file.exists()) {
            log.info((Object)("Skipping common name file " + file));
            return;
        }
        log.info((Object)("Starting to load the common names from " + file));
        int i = 0;
        int count = 0;
        CSVReader cbreader = new CSVReader((Reader)new FileReader(file), '\t', '\"', '\\', 0);
        String[] values = cbreader.readNext();
        while (values != null) {
            ++i;
            if (values.length == 6) {
                String lsid = StringUtils.isNotEmpty((String)values[1]) ? values[1] : values[0];
                TopDocs result = this.getLoadIdxResults(null, "lsid", lsid, 1);
                if (result.totalHits > 0) {
                    Document doc = this.createCommonNameDocument(values[3], values[2], lsid, values[4], 1.0f, false);
                    this.vernacularIndexWriter.addDocument((Iterable)doc);
                    ++count;
                }
            } else {
                log.info((Object)("Issue on line " + i + "  " + values[0]));
            }
            if (i % 1000 == 0) {
                log.info((Object)("Processed " + i + " common names with " + count + " added to index"));
            }
            values = cbreader.readNext();
        }
        log.info((Object)("Finished processing " + i + " common names with " + count + " added to index"));
        this.vernacularIndexWriter.commit();
        this.vernacularIndexWriter.forceMerge(1);
    }

    private void indexCommonNameExtension(File archiveDirectory) throws Exception {
        Archive archive = ArchiveFactory.openArchive((File)archiveDirectory);
        ArchiveFile vernacularArchiveFile = archive.getExtension((org.gbif.dwc.terms.Term)GbifTerm.VernacularName);
        Iterator iter = vernacularArchiveFile == null ? null : vernacularArchiveFile.iterator();
        int i = 0;
        int count = 0;
        if (vernacularArchiveFile == null) {
            log.info((Object)("No common names extension from found in " + archiveDirectory));
            return;
        }
        log.info((Object)("Starting to load the common names extension from " + archiveDirectory));
        while (iter.hasNext()) {
            ++i;
            Record record = (Record)iter.next();
            String taxonID = record.id();
            String vernacularName = record.value((org.gbif.dwc.terms.Term)DwcTerm.vernacularName);
            String language = record.value((org.gbif.dwc.terms.Term)DcTerm.language);
            TopDocs result = this.getLoadIdxResults(null, "lsid", taxonID, 1);
            if (result.totalHits > 0) {
                Document sciNameDoc = this.lsearcher.doc(result.scoreDocs[0].doc);
                Document doc = this.createCommonNameDocument(vernacularName, sciNameDoc.get(NameIndexField.NAME.toString()), taxonID, language, 1.0f, false);
                this.vernacularIndexWriter.addDocument((Iterable)doc);
                ++count;
            }
            if (i % 1000 != 0) continue;
            log.info((Object)("Processed " + i + " common names with " + count + " added to index"));
        }
        log.info((Object)("Finished processing " + i + " common names with " + count + " added to index"));
        this.vernacularIndexWriter.commit();
        this.vernacularIndexWriter.forceMerge(1);
    }

    private void createLoadingIndex(File archiveDirectory, boolean colformat) throws Exception {
        if (archiveDirectory == null || !archiveDirectory.exists()) {
            log.warn((Object)("Unable to created loading index for " + archiveDirectory + " as it does not exisit"));
            return;
        }
        if (!this.loadingIndex) {
            log.warn((Object)("Skipping loading index for " + archiveDirectory));
            return;
        }
        log.info((Object)("Starting to create the temporary loading index for " + archiveDirectory));
        Archive archive = ArchiveFactory.openArchive((File)archiveDirectory);
        ArchiveField nameCompleteField = archive.getCore().getField("nameComplete");
        org.gbif.dwc.terms.Term nameCompleteTerm = nameCompleteField == null ? null : nameCompleteField.getTerm();
        ClosableIterator it = archive.iterator();
        int i = 0;
        long start = System.currentTimeMillis();
        while (it.hasNext()) {
            String taxonID;
            Document doc = new Document();
            StarRecord sr = (StarRecord)it.next();
            Record record = sr.core();
            String id = record.id();
            String string = taxonID = record.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID) == null ? id : record.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID);
            if (colformat && record.value((org.gbif.dwc.terms.Term)DcTerm.identifier) != null) {
                taxonID = record.value((org.gbif.dwc.terms.Term)DcTerm.identifier);
            }
            String acceptedNameUsageID = record.value((org.gbif.dwc.terms.Term)DwcTerm.acceptedNameUsageID);
            String parentNameUsageID = record.value((org.gbif.dwc.terms.Term)DwcTerm.parentNameUsageID);
            String scientificName = record.value((org.gbif.dwc.terms.Term)DwcTerm.scientificName);
            String scientificNameAuthorship = record.value((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship);
            String genus = record.value((org.gbif.dwc.terms.Term)DwcTerm.genus);
            String specificEpithet = record.value((org.gbif.dwc.terms.Term)DwcTerm.specificEpithet);
            String infraspecificEpithet = record.value((org.gbif.dwc.terms.Term)DwcTerm.infraspecificEpithet);
            String taxonRank = record.value((org.gbif.dwc.terms.Term)DwcTerm.taxonRank);
            String datasetID = record.value((org.gbif.dwc.terms.Term)DwcTerm.datasetID);
            String nameComplete = nameCompleteTerm == null ? null : record.value(nameCompleteTerm);
            nameComplete = this.buildNameComplete(scientificName, scientificNameAuthorship, nameComplete);
            doc.add((IndexableField)new StringField(NameIndexField.ID.toString(), id, Field.Store.YES));
            if (StringUtils.isNotBlank((String)taxonID)) {
                doc.add((IndexableField)new StringField(NameIndexField.LSID.toString(), taxonID, Field.Store.YES));
            } else {
                System.out.println("LSID is null for " + id + " " + taxonID + " " + acceptedNameUsageID);
            }
            if (StringUtils.isNotBlank((String)parentNameUsageID)) {
                doc.add((IndexableField)new StringField(NameIndexField.PARENT_ID.toString(), parentNameUsageID, Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)acceptedNameUsageID)) {
                doc.add((IndexableField)new StringField(NameIndexField.ACCEPTED.toString(), acceptedNameUsageID, Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)scientificName)) {
                doc.add((IndexableField)new StoredField(NameIndexField.NAME.toString(), scientificName));
            }
            if (StringUtils.isNotBlank((String)scientificNameAuthorship)) {
                doc.add((IndexableField)new StoredField(NameIndexField.AUTHOR.toString(), scientificNameAuthorship));
            }
            if (StringUtils.isNotBlank((String)nameComplete)) {
                doc.add((IndexableField)new StoredField(NameIndexField.NAME_COMPLETE.toString(), nameComplete));
            }
            if (StringUtils.isNotBlank((String)genus)) {
                doc.add((IndexableField)new StoredField(NameIndexField.GENUS.toString(), genus));
            }
            if (StringUtils.isNotBlank((String)specificEpithet)) {
                doc.add((IndexableField)new StoredField(NameIndexField.SPECIFIC.toString(), specificEpithet));
            }
            if (StringUtils.isNotBlank((String)infraspecificEpithet)) {
                doc.add((IndexableField)new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), infraspecificEpithet));
            }
            if (StringUtils.isNotBlank((String)taxonRank)) {
                RankType rt = RankType.getForStrRank(taxonRank);
                if (rt != null) {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES));
                    doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), rt.getId().toString(), Field.Store.YES));
                } else {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), taxonRank, Field.Store.YES));
                    doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES));
                doc.add((IndexableField)new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES));
            }
            if (StringUtils.equals((String)taxonID, (String)acceptedNameUsageID) || StringUtils.equals((String)id, (String)acceptedNameUsageID) || acceptedNameUsageID == null) {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES));
                if (StringUtils.isBlank((String)parentNameUsageID)) {
                    doc.add((IndexableField)new StringField("root", "T", Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES));
            }
            if (StringUtils.isNotBlank((String)datasetID)) {
                doc.add((IndexableField)new StoredField(NameIndexField.DATASET_ID.toString(), datasetID));
            }
            this.loadingIndexWriter.addDocument((Iterable)doc);
            if (++i % 1000 != 0) continue;
            long finish = System.currentTimeMillis();
            log.debug((Object)("Loading index: " + i + " records per sec: " + 1000.0f / ((float)(finish / start) / 1000.0f)));
            start = finish;
        }
        log.info((Object)("Finished creating the temporary load index with " + i + " concepts"));
        this.loadingIndexWriter.commit();
        this.loadingIndexWriter.forceMerge(1);
        this.loadingIndexWriter.close();
        if (colformat) {
            log.info((Object)"Col format specified, organising parent / child IDS");
            this.loadingIndexWriter = this.createIndexWriter(this.tmpDir, (Analyzer)new KeywordAnalyzer(), false);
            DirectoryReader reader = DirectoryReader.open((Directory)FSDirectory.open((Path)this.tmpDir.toPath()));
            IndexSearcher loadingIdxSearch = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((Path)this.tmpDir.toPath())));
            int maxDocId = reader.maxDoc();
            for (int docIdx = 0; docIdx < maxDocId; ++docIdx) {
                Document taxonDoc;
                String parentLsid;
                Document currentDoc = reader.document(docIdx);
                String id = currentDoc.get("id");
                String parentId = currentDoc.get("parent_id");
                if (parentId == null) continue;
                TopDocs topDocs = loadingIdxSearch.search((Query)new TermQuery(new Term("id", parentId)), 1);
                if (topDocs.totalHits != 1 || (parentLsid = (taxonDoc = loadingIdxSearch.doc(topDocs.scoreDocs[0].doc)).get("lsid")) == null) continue;
                List updates = currentDoc.getFields();
                IndexableField parentID = currentDoc.getField("parent_id");
                updates.remove(parentID);
                updates.add(new StringField(NameIndexField.PARENT_ID.toString(), parentLsid, Field.Store.YES));
                this.loadingIndexWriter.updateDocument(new Term("id", id), (Iterable)updates);
            }
            this.loadingIndexWriter.commit();
            this.loadingIndexWriter.forceMerge(1);
            this.loadingIndexWriter.close();
        }
    }

    public void commitLoadingIndexes() throws IOException {
        if (this.loadingIndexWriter != null) {
            this.loadingIndexWriter.close();
            this.loadingIndexWriter = null;
        }
        this.lsearcher = null;
    }

    private TopDocs getLoadIdxResults(ScoreDoc after, String field, String value, int max) throws Exception {
        if (this.lsearcher == null && this.tmpDir.exists()) {
            this.lsearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((Path)this.tmpDir.toPath())));
        } else if (this.lsearcher == null && !this.tmpDir.exists()) {
            throw new RuntimeException("A load index has not been generated. Please run this tool with '-load' before creating the search index.");
        }
        TermQuery tq = new TermQuery(new Term(field, value));
        return after == null ? this.lsearcher.search((Query)tq, max) : this.lsearcher.searchAfter(after, (Query)tq, max);
    }

    private TopDocs getRootConcepts() throws Exception {
        TopDocs rootConcepts = this.getLoadIdxResults(null, "root", "T", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Root concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        log.info((Object)"No root concepts found - looking by major linnean ranks");
        rootConcepts = this.getLoadIdxResults(null, "rank", "kingdom", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Kingdom concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        rootConcepts = this.getLoadIdxResults(null, "rank", "phylum", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Phyla concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        rootConcepts = this.getLoadIdxResults(null, "rank", "class", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Class concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        rootConcepts = this.getLoadIdxResults(null, "rank", "order", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Order concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        rootConcepts = this.getLoadIdxResults(null, "rank", "family", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Family concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        rootConcepts = this.getLoadIdxResults(null, "rank", "genus", PAGE_SIZE);
        if (rootConcepts != null && rootConcepts.totalHits > 0) {
            log.info((Object)("Genera concepts found : " + rootConcepts.totalHits));
            return rootConcepts;
        }
        return rootConcepts;
    }

    private void generateIndex() throws Exception {
        int left;
        int right;
        log.info((Object)"Loading index from temporary index.");
        TopDocs rootConcepts = this.getRootConcepts();
        int lastRight = right = (left = 0);
        int count = 0;
        while (rootConcepts != null && rootConcepts.totalHits > 0) {
            ScoreDoc lastConcept = null;
            ScoreDoc[] scoreDocArray = rootConcepts.scoreDocs;
            int n = scoreDocArray.length;
            for (int i = 0; i < n; ++i) {
                ScoreDoc sd;
                lastConcept = sd = scoreDocArray[i];
                left = right + 1;
                Document doc = this.lsearcher.doc(sd.doc);
                right = this.addIndex(doc, 1, left, new LinnaeanRankClassification(), 0);
                if (right - lastRight > 1000) {
                    log.info((Object)("Finished loading root " + doc.get(NameIndexField.LSID.toString()) + " " + doc.get(NameIndexField.NAME.toString()) + " left:" + left + " right:" + right + " root count:" + count));
                    lastRight = right;
                }
                if (++count % 10000 != 0) continue;
                log.info((Object)("Loading index:" + count));
            }
            rootConcepts = lastConcept == null ? null : this.getLoadIdxResults(lastConcept, "root", "T", PAGE_SIZE);
            if (rootConcepts == null || rootConcepts.scoreDocs.length <= 0) continue;
            log.info((Object)"Loading next page of root concepts");
        }
    }

    private int addIndex(Document doc, int currentDepth, int currentLeft, LinnaeanRankClassification higherClass, int stackCheck) throws Exception {
        int left;
        String id = doc.get(NameIndexField.ID.toString());
        TopDocs children = this.getLoadIdxResults(null, "parent_id", id, PAGE_SIZE);
        if (children.totalHits == 0) {
            id = doc.get(NameIndexField.LSID.toString());
            children = this.getLoadIdxResults(null, "parent_id", id, PAGE_SIZE);
        }
        int right = left = currentLeft;
        int rankId = Integer.parseInt(doc.get(NameIndexField.RANK_ID.toString()));
        String name = doc.get(NameIndexField.NAME.toString());
        String nameComplete = doc.get(NameIndexField.NAME_COMPLETE.toString());
        String lsid = doc.get(NameIndexField.LSID.toString());
        String cname = this.getCanonical(name);
        LinnaeanRankClassification newcl = new LinnaeanRankClassification(higherClass);
        switch (rankId) {
            case 1000: {
                newcl.setKingdom(cname);
                newcl.setKid(lsid);
                break;
            }
            case 2000: {
                newcl.setPhylum(cname);
                newcl.setPid(lsid);
                break;
            }
            case 3000: {
                newcl.setKlass(cname);
                newcl.setCid(lsid);
                break;
            }
            case 4000: {
                newcl.setOrder(cname);
                newcl.setOid(lsid);
                break;
            }
            case 5000: {
                newcl.setFamily(cname);
                newcl.setFid(lsid);
                break;
            }
            case 6000: {
                newcl.setGenus(cname);
                newcl.setGid(lsid);
                break;
            }
            case 7000: {
                newcl.setSpecies(cname);
                newcl.setSid(lsid);
            }
        }
        while (children != null && children.scoreDocs.length > 0) {
            ScoreDoc lastChild = null;
            ScoreDoc[] scoreDocArray = children.scoreDocs;
            int n = scoreDocArray.length;
            for (int i = 0; i < n; ++i) {
                ScoreDoc child;
                lastChild = child = scoreDocArray[i];
                Document cdoc = this.lsearcher.doc(child.doc);
                if (cdoc == null || cdoc.get("id").equals(doc.get("id"))) continue;
                right = this.addIndex(cdoc, currentDepth + 1, right + 1, newcl, stackCheck++);
            }
            children = lastChild == null ? null : this.getLoadIdxResults(lastChild, "parent_id", id, PAGE_SIZE);
            if (children == null || children.scoreDocs.length <= 0) continue;
            log.info((Object)("Loading next page of children for " + id));
        }
        if (left % 2000 == 0) {
            log.debug((Object)("Last processed lft:" + left + " rgt:" + right + " depth:" + currentDepth + " classification " + newcl));
        }
        float boost = this.getBoost(doc.get(NameIndexField.DATASET_ID.toString()), rankId);
        Document indexDoc = this.createALAIndexDocument(name, doc.get(NameIndexField.ID.toString()), lsid, doc.get(NameIndexField.AUTHOR.toString()), doc.get(NameIndexField.RANK.toString()), doc.get(NameIndexField.RANK_ID.toString()), Integer.toString(left), Integer.toString(right), newcl, nameComplete, boost);
        this.writer.addDocument((Iterable)indexDoc);
        return right + 1;
    }

    protected float getBoost(String datasetID, int rankId) {
        float boost;
        float f = boost = this.priorities.containsKey(datasetID) ? this.priorities.get(datasetID).floatValue() : 1.0f;
        if (rankId >= 0 && rankId % 1000 == 0) {
            boost *= 5.0f;
        }
        return boost;
    }

    private String getCanonical(String name) {
        try {
            ParsedName pn = this.parser.parse(name);
            if (pn.isParsableType()) {
                return pn.canonicalName();
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        return name;
    }

    private void addSynonymsToIndex(File dwcaDir) throws Exception {
        org.gbif.dwc.terms.Term nameCompleteTerm;
        Archive archive = ArchiveFactory.openArchive((File)dwcaDir);
        ClosableIterator it = archive.iteratorDwc();
        int i = 0;
        int count = 0;
        ArchiveField nameCompleteField = archive.getCore().getField("nameComplete");
        org.gbif.dwc.terms.Term term = nameCompleteTerm = nameCompleteField == null ? null : nameCompleteField.getTerm();
        while (it.hasNext()) {
            DarwinCoreRecord dwcr = (DarwinCoreRecord)it.next();
            ++i;
            String lsid = dwcr.getTaxonID() != null ? dwcr.getTaxonID() : dwcr.getId();
            String id = dwcr.getId();
            String acceptedId = dwcr.getAcceptedNameUsageID();
            String nameComplete = nameCompleteTerm == null ? null : dwcr.getProperty(nameCompleteTerm);
            String scientificName = dwcr.getScientificName();
            String scientificNameAuthorship = dwcr.getScientificNameAuthorship();
            nameComplete = this.buildNameComplete(scientificName, scientificNameAuthorship, nameComplete);
            float boost = this.getBoost(dwcr.getDatasetID(), -1);
            if (StringUtils.isNotEmpty((String)acceptedId) && !StringUtils.equals((String)acceptedId, (String)id) && !StringUtils.equals((String)acceptedId, (String)lsid)) {
                ++count;
                try {
                    Document doc;
                    if (log.isDebugEnabled()) {
                        log.debug((Object)("Scientific name:  " + dwcr.getScientificName() + ", LSID:  " + dwcr.getId()));
                    }
                    if ((doc = this.createALASynonymDocument(scientificName, scientificNameAuthorship, nameComplete, dwcr.getId(), lsid, lsid, dwcr.getAcceptedNameUsageID(), dwcr.getAcceptedNameUsageID(), boost, dwcr.getTaxonomicStatus())) != null) {
                        this.writer.addDocument((Iterable)doc);
                    } else {
                        log.warn((Object)("Problem processing scientificName:  " + dwcr.getScientificName() + ", ID:  " + dwcr.getId() + ", LSID:  " + lsid));
                    }
                }
                catch (Exception e) {
                    log.error((Object)("Exception thrown processing Scientific name:  " + dwcr.getScientificName() + ", LSID:  " + dwcr.getId()));
                    log.error((Object)e.getMessage(), (Throwable)e);
                }
            }
            if (i % 1000 != 0) continue;
            log.debug((Object)("Processed " + i + " records " + count + " synonyms"));
        }
    }

    public static void findDwcas(File dir, boolean recurse, List<File> found) {
        File meta = new File(dir, "meta.xml");
        if (meta.exists()) {
            found.add(dir);
        }
        if (recurse && dir.exists()) {
            for (File d : dir.listFiles()) {
                if (!d.isDirectory()) continue;
                DwcaNameIndexer.findDwcas(d, recurse, found);
            }
        }
    }

    public static void main(String[] args) {
        String DEFAULT_DWCA = "/data/lucene/sources/dwca-col";
        String DEFAULT_IRMNG = "/data/lucene/sources/IRMNG_DWC_HOMONYMS";
        String DEFAULT_COMMON_NAME = "/data/lucene/sources/col_vernacular.txt";
        String DEFAULT_TARGET_DIR = "/data/lucene/namematching";
        String DEFAULT_TMP_DIR = "/data/lucene/nmload-tmp";
        String DEFAULT_PRIORITIES = "/data/lucene/sources/priorities.properties";
        Options options = new Options();
        options.addOption("v", "version", false, "Retrieve version information");
        options.addOption("h", "help", false, "Retrieve options");
        options.addOption("col_format", false, "Pass this flag if the DWCAs are provided by Catalogue of Life (which uses dcterms:identifier to store the LSID/GUID)");
        options.addOption("all", false, "Generates the load index and search index");
        options.addOption("load", false, "Generate the load index only. The load index is a temporary index generated from the raw data files used to load the main search index");
        options.addOption("search", false, "Generates the search index. A load index must already be created for this to run.");
        options.addOption("irmng", true, "The absolute path to the unzipped irmng DwCA. IRMNG is used to detect homonyms. Defaults to /data/lucene/sources/IRMNG_DWC_HOMONYMS");
        options.addOption("dwca", true, "The absolute path to the unzipped DwCA (or a directory containing unzipped DWC-A - see recurse) for the scientific names. If  Defaults to /data/lucene/sources/dwca-col See also, the recurse option");
        options.addOption("recurse", false, "Recurse through the sub-directories of the dwca directory, looking for directories with a meta.xml");
        options.addOption("priorities", true, "A properties file containing priority multiplers for the different data sources, keyed by datasetID->float. Defaults to /data/lucene/sources/priorities.properties");
        options.addOption("target", true, "The target directory to write the new name index to. Defaults to /data/lucene/namematching");
        options.addOption("tmp", true, "The tmp directory for the load index. Defaults to /data/lucene/nmload-tmp");
        options.addOption("common", true, "The common (vernacular) name file. Defaults to /data/lucene/sources/col_vernacular.txt");
        options.addOption("testSearch", true, "Debug a name search. This uses the target directory to search against.");
        options.addOption("testCommonSearch", true, "Debug a common name search. This takes a taxonID for the search.");
        options.addOption("testCommonSearchLang", true, "Debug a common name search, supplying a language.");
        BasicParser parser = new BasicParser();
        try {
            ALANameSearcher searcher;
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("v")) {
                InputStream stream = DwcaNameIndexer.class.getResourceAsStream("/git.properties");
                Properties properties = new Properties();
                if (stream != null) {
                    properties.load(stream);
                    properties.list(System.out);
                } else {
                    System.err.println("Unable to retrieve versioning information");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("help")) {
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("testSearch")) {
                boolean indexExists = new File("/data/lucene/namematching").exists();
                if (indexExists) {
                    Map<String, String> props;
                    System.out.println("Search for name: " + line.getOptionValue("testSearch"));
                    searcher = new ALANameSearcher(line.getOptionValue("target", "/data/lucene/namematching"));
                    NameSearchResult nsr = searcher.searchForRecord(line.getOptionValue("testSearch"));
                    if (nsr == null) {
                        nsr = searcher.searchForRecordByLsid(line.getOptionValue("testSearch"));
                    }
                    if (nsr != null) {
                        props = nsr.toMap();
                        for (Map.Entry<String, String> entry : props.entrySet()) {
                            System.out.println(entry.getKey() + ": " + entry.getValue());
                        }
                    } else {
                        nsr = searcher.searchForCommonName(line.getOptionValue("testSearch"));
                        if (nsr != null) {
                            props = nsr.toMap();
                            for (Map.Entry<String, String> entry : props.entrySet()) {
                                System.out.println(entry.getKey() + ": " + entry.getValue());
                            }
                        } else {
                            System.err.println("No match for " + line.getOptionValue("testSearch"));
                        }
                    }
                    System.exit(1);
                } else {
                    System.err.println("Index unreadable. Check /data/lucene/namematching");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("testCommonSearch")) {
                boolean indexExists = new File("/data/lucene/namematching").exists();
                if (indexExists) {
                    System.out.println("Search for name: " + line.getOptionValue("testCommonSearch"));
                    searcher = new ALANameSearcher(line.getOptionValue("target", "/data/lucene/namematching"));
                    String lsid = line.getOptionValue("testCommonSearch");
                    String language = line.getOptionValue("testCommonSearchLang");
                    String commonName = null;
                    commonName = StringUtils.isNotBlank((String)language) ? searcher.getCommonNameForLSID(lsid, new String[]{language}) : searcher.getCommonNameForLSID(lsid);
                    if (commonName == null) {
                        if (StringUtils.isNotBlank((String)language)) {
                            System.err.println("No common name indexed for taxonID: " + lsid + " and language " + language);
                        } else {
                            System.err.println("No common name indexed for taxonID: " + lsid);
                        }
                    } else {
                        System.out.println("Match: " + commonName);
                    }
                    System.exit(1);
                } else {
                    System.err.println("Index unreadable. Check /data/lucene/namematching");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            boolean recurse = line.hasOption("recurse");
            boolean load = line.hasOption("load") || line.hasOption("all");
            boolean search = line.hasOption("search") || line.hasOption("all");
            boolean colformat = line.hasOption("col_format");
            if (!(line.hasOption("load") || line.hasOption("search") || line.hasOption("all"))) {
                load = true;
                search = true;
            }
            log.info((Object)("Generating loading index: " + load));
            log.info((Object)("Generating searching index: " + search));
            boolean defaultIrmngReadable = new File("/data/lucene/sources/IRMNG_DWC_HOMONYMS").exists();
            boolean defaultCommonReadable = new File("/data/lucene/sources/col_vernacular.txt").exists();
            boolean defaultDwcaReadable = new File("/data/lucene/sources/dwca-col").exists();
            boolean defaultPriorities = new File("/data/lucene/sources/priorities.properties").exists();
            if (line.getOptionValue("dwca") != null) {
                log.info((Object)("Using the  DwCA name file: " + line.getOptionValue("dwca")));
            } else if (defaultDwcaReadable) {
                log.info((Object)"Using the default DwCA name file: /data/lucene/sources/dwca-col");
            } else {
                log.error((Object)"No DwC Archive specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/dwca-col");
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.getOptionValue("irmng") == null && !defaultIrmngReadable) {
                log.warn((Object)"No IRMNG export specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/IRMNG_DWC_HOMONYMS");
            } else if (line.getOptionValue("irmng") == null) {
                log.info((Object)"Using the default IRMNG name file: /data/lucene/sources/IRMNG_DWC_HOMONYMS");
            } else {
                log.info((Object)("Using the  IRMNG name file: " + line.getOptionValue("irmng")));
            }
            if (line.getOptionValue("common") == null && !defaultCommonReadable) {
                log.warn((Object)"No common name export specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/col_vernacular.txt");
            } else if (line.getOptionValue("common") == null) {
                log.info((Object)"Using the default common name file: /data/lucene/sources/col_vernacular.txt");
            } else {
                log.info((Object)("Using the common name file: " + line.getOptionValue("common")));
            }
            if (line.getOptionValue("priorities") == null && !defaultPriorities) {
                log.warn((Object)"No priorities file, defaulting to uniform priorities.");
            } else if (line.getOptionValue("priorities") == null) {
                log.info((Object)"Using the default priorities file: /data/lucene/sources/priorities.properties");
            } else {
                log.info((Object)("Using the priorities file: " + line.getOptionValue("priorities")));
            }
            File targetDirectory = new File(line.getOptionValue("target", "/data/lucene/namematching"));
            if (targetDirectory.exists()) {
                String newPath = targetDirectory.getAbsolutePath() + "_" + DateFormatUtils.format((Date)new Date(), (String)"yyyy-MM-dd_hh-mm-ss");
                log.info((Object)("Target directory already exists. Backing up to : " + newPath));
                File newTargetDirectory = new File(newPath);
                FileUtils.moveDirectory((File)targetDirectory, (File)newTargetDirectory);
                FileUtils.forceMkdir((File)targetDirectory);
            }
            File commonNameFile = new File(line.getOptionValue("common", "/data/lucene/sources/col_vernacular.txt"));
            File irmngFile = new File(line.getOptionValue("irmng", "/data/lucene/sources/IRMNG_DWC_HOMONYMS"));
            File prioritiesFile = new File(line.getOptionValue("priorities", "/data/lucene/sources/priorities.properties"));
            Properties priorities = new Properties();
            if (prioritiesFile.exists()) {
                priorities.load(new FileInputStream(prioritiesFile));
            }
            ArrayList<File> dwcas = new ArrayList<File>();
            File base = new File(line.getOptionValue("dwca", "/data/lucene/sources/dwca-col"));
            DwcaNameIndexer.findDwcas(base, recurse, dwcas);
            if (dwcas.isEmpty()) {
                log.warn((Object)("No DwCA directories found under " + base));
                System.exit(1);
            }
            log.info((Object)("Loading DwCAs: " + dwcas));
            DwcaNameIndexer indexer = new DwcaNameIndexer(targetDirectory, new File(line.getOptionValue("tmp", "/data/lucene/nmload-tmp")), priorities, load, search);
            indexer.begin();
            if (load) {
                for (File dwca : dwcas) {
                    indexer.createLoadingIndex(dwca, colformat);
                }
                indexer.commitLoadingIndexes();
            }
            indexer.generateIndex();
            for (File dwca : dwcas) {
                indexer.create(dwca);
            }
            if (commonNameFile.exists()) {
                indexer.indexCommonNames(commonNameFile);
            }
            indexer.createIrmng(irmngFile);
            indexer.commit();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

