/*
 * Decompiled with CFR 0.152.
 */
package au.org.ala.names.search;

import au.org.ala.names.lucene.analyzer.LowerCaseKeywordAnalyzer;
import au.org.ala.names.model.LinnaeanRankClassification;
import au.org.ala.names.model.NameSearchResult;
import au.org.ala.names.model.RankType;
import au.org.ala.names.model.TaxonomicType;
import au.org.ala.names.search.ALANameIndexer;
import au.org.ala.names.search.ALANameSearcher;
import au.org.ala.names.search.NameIndexField;
import au.org.ala.names.search.SearchResultException;
import au.org.ala.vocab.ALATerm;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import com.google.common.collect.Sets;
import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.CSVWriter;
import com.opencsv.ICSVParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.Writer;
import java.nio.file.Path;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.gbif.api.exception.UnparsableException;
import org.gbif.api.model.checklistbank.ParsedName;
import org.gbif.api.model.registry.Citation;
import org.gbif.api.model.registry.Contact;
import org.gbif.api.model.registry.Dataset;
import org.gbif.api.model.registry.Identifier;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifTerm;
import org.gbif.dwca.io.Archive;
import org.gbif.dwca.io.ArchiveFactory;
import org.gbif.dwca.io.ArchiveFile;
import org.gbif.dwca.record.Record;
import org.gbif.dwca.record.StarRecord;
import org.gbif.utils.file.ClosableIterator;

public class DwcaNameIndexer
extends ALANameIndexer {
    protected static Logger log = Logger.getLogger(DwcaNameIndexer.class);
    private static ALATerm TRIGGER = ALATerm.TaxonVariant;
    protected static RankType[] SYNONYM_INFERRED_RANKS = new RankType[]{RankType.KINGDOM, RankType.PHYLUM, RankType.CLASS, RankType.ORDER, RankType.FAMILY};
    protected static final Pattern LOCALITY_PATTERN = Pattern.compile("^([\\p{Alnum}.'()\\s]+)\\s+\\([\\p{Alnum}\\s]+\\)\\s*$");
    private static int PAGE_SIZE = 25000;
    private boolean loadingIndex;
    private boolean sciIndex;
    private File targetDir;
    private File tmpDir;
    private IndexSearcher lsearcher;
    private IndexSearcher cbSearcher;
    private IndexWriter writer = null;
    private IndexWriter loadingIndexWriter = null;
    private IndexWriter vernacularIndexWriter = null;
    private IndexWriter idWriter = null;
    private Analyzer analyzer;
    private Map<String, Float> priorities;
    private Set<Dataset> sources;
    private Map<String, Usage> idMap;
    private Map<String, Usage> preferredIdMap;
    private boolean indexChanged;

    public DwcaNameIndexer(File targetDir, File tmpDir, Properties priorities, boolean loadingIndex, boolean sciIndex) throws IOException {
        this.targetDir = targetDir;
        this.tmpDir = tmpDir;
        this.loadingIndex = loadingIndex;
        this.sciIndex = sciIndex;
        this.analyzer = LowerCaseKeywordAnalyzer.newInstance();
        this.priorities = this.buildPriorities(priorities);
    }

    public void begin() throws Exception {
        if (this.loadingIndex) {
            this.loadingIndexWriter = this.createIndexWriter(this.tmpDir, (Analyzer)new KeywordAnalyzer(), true);
        }
        if (this.sciIndex) {
            this.writer = this.createIndexWriter(new File(this.targetDir, "cb"), this.analyzer, true);
            this.idWriter = this.createIndexWriter(new File(this.targetDir, "id"), this.analyzer, true);
            this.vernacularIndexWriter = this.createIndexWriter(new File(this.targetDir, "vernacular"), (Analyzer)new KeywordAnalyzer(), true);
        }
        this.indexChanged = false;
        this.idMap = new TreeMap<String, Usage>();
        this.preferredIdMap = new TreeMap<String, Usage>();
        this.sources = new HashSet<Dataset>();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void commit() {
        if (this.loadingIndexWriter != null) {
            try {
                this.loadingIndexWriter.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close loading index", (Throwable)ex);
            }
            finally {
                this.loadingIndexWriter = null;
            }
        }
        if (this.writer != null) {
            try {
                this.writer.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close index", (Throwable)ex);
            }
            finally {
                this.writer = null;
            }
        }
        if (this.vernacularIndexWriter != null) {
            try {
                this.vernacularIndexWriter.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close vernacular index", (Throwable)ex);
            }
            finally {
                this.vernacularIndexWriter = null;
            }
        }
        if (this.idWriter != null) {
            try {
                this.idWriter.close();
            }
            catch (IOException ex) {
                log.error((Object)"Unable to close index", (Throwable)ex);
            }
            finally {
                this.idWriter = null;
            }
        }
    }

    protected Map<String, Float> buildPriorities(Properties properties) {
        HashMap<String, Float> map = new HashMap<String, Float>(properties.size());
        for (String ds : properties.stringPropertyNames()) {
            String p = properties.getProperty(ds);
            float pr = 1.0f;
            try {
                pr = Float.parseFloat(p);
            }
            catch (NumberFormatException ex) {
                log.warn((Object)("Unable to parse priority " + p + " for " + ds + " defaulting to 1.0"));
            }
            map.put(ds, Float.valueOf(pr));
        }
        return map;
    }

    public boolean create(File namesDwc) throws Exception {
        if (namesDwc == null || !namesDwc.exists()) {
            log.warn((Object)("Skipping " + namesDwc + " as it does not exist"));
            return false;
        }
        Archive archive = ArchiveFactory.openArchive((File)namesDwc);
        if (!archive.getCore().getRowType().equals(DwcTerm.Taxon)) {
            log.info((Object)"Skipping non-taxon DwCA");
            return false;
        }
        log.info((Object)("Loading synonyms for " + namesDwc));
        this.addSynonymsToIndex(archive);
        this.writer.commit();
        this.writer.forceMerge(1);
        log.info((Object)("Loading vernacular for " + namesDwc));
        this.indexCommonNameExtension(archive);
        log.info((Object)("Loading identfiiers for " + namesDwc));
        this.indexIdentifierExtension(archive);
        this.sources.add(archive.getMetadata());
        return true;
    }

    public void createIrmng(File irmngDwc) throws Exception {
        IndexWriter irmngWriter = this.createIndexWriter(new File(this.targetDir, "irmng"), this.analyzer, true);
        if (irmngDwc != null && irmngDwc.exists()) {
            this.indexIrmngDwcA(irmngWriter, irmngDwc.getCanonicalPath());
            Archive source = ArchiveFactory.openArchive((File)irmngDwc);
            this.sources.add(source.getMetadata());
        }
        irmngWriter.commit();
        irmngWriter.forceMerge(1);
        irmngWriter.close();
    }

    public void createExtraIdIndex(File extraIds) throws Exception {
        this.createExtraIdIndex(this.idWriter, extraIds);
    }

    private boolean loadCommonNames(File verncacularDwc) throws Exception {
        if (verncacularDwc == null || !verncacularDwc.exists()) {
            log.warn((Object)("Skipping " + verncacularDwc + " as it does not exist"));
            return false;
        }
        Archive archive = ArchiveFactory.openArchive((File)verncacularDwc);
        if (!archive.getCore().getRowType().equals(GbifTerm.VernacularName)) {
            log.info((Object)"Skipping non-vernacular DwCA");
            return false;
        }
        if (!archive.getCore().hasTerm((org.gbif.dwc.terms.Term)DwcTerm.vernacularName)) {
            log.error((Object)("Vernacular file " + verncacularDwc + " requires " + DwcTerm.vernacularName));
            return false;
        }
        if (!archive.getCore().hasTerm((org.gbif.dwc.terms.Term)DwcTerm.scientificName) && !archive.getCore().hasTerm((org.gbif.dwc.terms.Term)DwcTerm.taxonID)) {
            log.error((Object)("Vernacular file " + verncacularDwc + " requires either " + DwcTerm.scientificName + " or " + DwcTerm.taxonID));
            return false;
        }
        if (archive.getCore().hasTerm((org.gbif.dwc.terms.Term)DwcTerm.scientificName) && !archive.getCore().hasTerm((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship)) {
            log.warn((Object)("Vernacular file " + verncacularDwc + " has" + DwcTerm.scientificName + " but not " + DwcTerm.scientificNameAuthorship));
            return false;
        }
        for (org.gbif.dwc.terms.Term term : Arrays.asList(DcTerm.language)) {
            log.warn((Object)("Vernacular file " + verncacularDwc + " is missing " + term));
        }
        log.info((Object)("Loading vernacular names for " + verncacularDwc));
        ALANameSearcher searcher = new ALANameSearcher(this.targetDir.getAbsolutePath());
        for (Record record : archive.getCore()) {
            String lsid;
            String taxonId = record.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID);
            String scientificName = record.value((org.gbif.dwc.terms.Term)DwcTerm.scientificName);
            String vernacularName = record.value((org.gbif.dwc.terms.Term)DwcTerm.vernacularName);
            String scientificNameAuthorship = record.value((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship);
            String kingdom = record.value((org.gbif.dwc.terms.Term)DwcTerm.kingdom);
            String phylum = record.value((org.gbif.dwc.terms.Term)DwcTerm.phylum);
            String klass = record.value((org.gbif.dwc.terms.Term)DwcTerm.class_);
            String order = record.value((org.gbif.dwc.terms.Term)DwcTerm.order);
            String family = record.value((org.gbif.dwc.terms.Term)DwcTerm.family);
            String genus = record.value((org.gbif.dwc.terms.Term)DwcTerm.genus);
            String specificEpithet = record.value((org.gbif.dwc.terms.Term)DwcTerm.specificEpithet);
            String infraspecificEpithet = record.value((org.gbif.dwc.terms.Term)DwcTerm.infraspecificEpithet);
            String rank = record.value((org.gbif.dwc.terms.Term)DwcTerm.taxonRank);
            String language = record.value((org.gbif.dwc.terms.Term)DcTerm.language);
            LinnaeanRankClassification classification = new LinnaeanRankClassification();
            NameSearchResult result = null;
            if (taxonId != null) {
                result = searcher.searchForRecordByLsid(taxonId);
            }
            if (result == null && scientificName != null) {
                classification.setScientificName(scientificName);
                classification.setAuthorship(scientificNameAuthorship);
                classification.setKingdom(kingdom);
                classification.setPhylum(phylum);
                classification.setKlass(klass);
                classification.setOrder(order);
                classification.setFamily(family);
                classification.setGenus(genus);
                classification.setSpecificEpithet(specificEpithet);
                classification.setInfraspecificEpithet(infraspecificEpithet);
                classification.setRank(rank);
                try {
                    result = searcher.searchForRecord(classification, false, false);
                }
                catch (SearchResultException ex) {
                    log.warn((Object)("Can't find matching taxon for " + classification + " and vernacular name " + vernacularName + " exception " + ex.getMessage()));
                    continue;
                }
            }
            if (result == null) {
                log.warn((Object)("Can't find matching taxon for " + classification + " and vernacular name " + vernacularName));
                continue;
            }
            String string = lsid = result.getAcceptedLsid() != null ? result.getAcceptedLsid() : result.getLsid();
            if (scientificName == null) {
                scientificName = result.getRankClassification().getScientificName();
            }
            Document doc = this.createCommonNameDocument(vernacularName, scientificName, lsid, language, false);
            this.vernacularIndexWriter.addDocument((Iterable)doc);
        }
        this.sources.add(archive.getMetadata());
        return true;
    }

    private void indexCommonNames(File file) throws Exception {
        if (file == null || !file.exists()) {
            log.info((Object)("Skipping common name file " + file));
            return;
        }
        log.info((Object)("Starting to load the common names from " + file));
        int i = 0;
        int count = 0;
        CSVReader cbreader = this.buildCSVReader(file.getPath(), '\t', '\"', '\\', 0);
        String[] values = cbreader.readNext();
        while (values != null) {
            ++i;
            if (values.length == 6) {
                String lsid = StringUtils.isNotEmpty((CharSequence)values[1]) ? values[1] : values[0];
                TopDocs result = this.getLoadIdxResults(null, "lsid", lsid, 1);
                if (result.totalHits.value > 0L) {
                    Document doc = this.createCommonNameDocument(values[3], values[2], lsid, values[4], false);
                    this.vernacularIndexWriter.addDocument((Iterable)doc);
                    ++count;
                }
            } else {
                log.info((Object)("Issue on line " + i + "  " + values[0]));
            }
            if (i % 1000 == 0) {
                log.info((Object)("Processed " + i + " common names with " + count + " added to index"));
            }
            values = cbreader.readNext();
        }
        log.info((Object)("Finished processing " + i + " common names with " + count + " added to index"));
        this.vernacularIndexWriter.commit();
        this.vernacularIndexWriter.forceMerge(1);
    }

    private void indexCommonNameExtension(Archive archive) throws Exception {
        ArchiveFile vernacularArchiveFile = archive.getExtension((org.gbif.dwc.terms.Term)GbifTerm.VernacularName);
        Iterator iter = vernacularArchiveFile == null ? null : vernacularArchiveFile.iterator();
        int i = 0;
        int count = 0;
        if (vernacularArchiveFile == null) {
            log.info((Object)("No common names extension from found in " + archive.getLocation()));
            return;
        }
        log.info((Object)("Starting to load the common names extension from " + archive.getLocation()));
        while (iter.hasNext()) {
            ++i;
            Record record = (Record)iter.next();
            String taxonID = record.id();
            String vernacularName = record.value((org.gbif.dwc.terms.Term)DwcTerm.vernacularName);
            String language = record.value((org.gbif.dwc.terms.Term)DcTerm.language);
            TopDocs result = this.getLoadIdxResults(null, "lsid", taxonID, 1);
            if (result.totalHits.value > 0L) {
                Document sciNameDoc = this.lsearcher.doc(result.scoreDocs[0].doc);
                Document doc = this.createCommonNameDocument(vernacularName, sciNameDoc.get(NameIndexField.NAME.toString()), taxonID, language, false);
                this.vernacularIndexWriter.addDocument((Iterable)doc);
                ++count;
            }
            if (i % 10000 != 0) continue;
            log.info((Object)("Processed " + i + " common names with " + count + " added to index"));
        }
        log.info((Object)("Finished processing " + i + " common names with " + count + " added to index"));
        this.vernacularIndexWriter.commit();
        this.vernacularIndexWriter.forceMerge(1);
    }

    private void indexIdentifierExtension(Archive archive) throws Exception {
        ArchiveFile identifierArchiveFile = archive.getExtension((org.gbif.dwc.terms.Term)GbifTerm.Identifier);
        Iterator iter = identifierArchiveFile == null ? null : identifierArchiveFile.iterator();
        HashMap<String, Set> seen = new HashMap<String, Set>();
        int i = 0;
        int count = 0;
        if (identifierArchiveFile == null) {
            log.info((Object)("No identifier extension from found in " + archive.getLocation()));
            return;
        }
        log.info((Object)("Starting to load the identifiers extension from " + archive.getLocation()));
        while (iter.hasNext()) {
            ++i;
            Record record = (Record)iter.next();
            String taxonID = record.id();
            String identifier = record.value((org.gbif.dwc.terms.Term)DcTerm.identifier);
            Set seenIds = seen.computeIfAbsent(taxonID, k -> new HashSet());
            if (!seenIds.contains(identifier) && !taxonID.equals(identifier)) {
                TopDocs result = this.getLoadIdxResults(null, "lsid", taxonID, 1);
                if (result.totalHits.value > 0L) {
                    Document sciNameDoc = this.lsearcher.doc(result.scoreDocs[0].doc);
                    Document doc = this.createIdentifierDocument(identifier, sciNameDoc.get(NameIndexField.NAME.toString()), taxonID);
                    this.idWriter.addDocument((Iterable)doc);
                    ++count;
                }
                seenIds.add(identifier);
            }
            if (i % 10000 != 0) continue;
            log.info((Object)("Processed " + i + " identifiers with " + count + " added to index"));
        }
        log.info((Object)("Finished processing " + i + " idenitfiers with " + count + " added to index"));
        this.idWriter.commit();
        this.idWriter.forceMerge(1);
    }

    public boolean createLoadingIndex(File archiveDirectory) throws Exception {
        if (archiveDirectory == null || !archiveDirectory.exists()) {
            log.warn((Object)("Unable to created loading index for " + archiveDirectory + " as it does not exisit"));
            return false;
        }
        if (!this.loadingIndex) {
            log.warn((Object)("Skipping loading index for " + archiveDirectory));
            return false;
        }
        log.info((Object)("Starting to create the temporary loading index for " + archiveDirectory));
        Archive archive = ArchiveFactory.openArchive((File)archiveDirectory);
        if (!archive.getCore().getRowType().equals(DwcTerm.Taxon)) {
            log.info((Object)"Skipping non-taxon DwCA");
            return false;
        }
        ClosableIterator it = archive.iterator();
        int i = 0;
        long start = System.currentTimeMillis();
        while (it.hasNext()) {
            String parentNameUsageID;
            Document doc = new Document();
            StarRecord dwcr = (StarRecord)it.next();
            Record core = dwcr.core();
            String id = core.id();
            String taxonID = core.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID) == null ? id : core.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID);
            String acceptedNameUsageID = core.value((org.gbif.dwc.terms.Term)DwcTerm.acceptedNameUsageID);
            if (acceptedNameUsageID != null && acceptedNameUsageID.equals(taxonID)) {
                acceptedNameUsageID = null;
            }
            if ((parentNameUsageID = core.value((org.gbif.dwc.terms.Term)DwcTerm.parentNameUsageID)) != null && parentNameUsageID.equals(taxonID)) {
                parentNameUsageID = null;
            }
            if (parentNameUsageID != null && parentNameUsageID.equals(acceptedNameUsageID)) {
                acceptedNameUsageID = null;
            }
            String nameComplete = core.value((org.gbif.dwc.terms.Term)ALATerm.nameComplete);
            String scientificName = core.value((org.gbif.dwc.terms.Term)DwcTerm.scientificName);
            String scientificNameAuthorship = core.value((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship);
            String genus = core.value((org.gbif.dwc.terms.Term)DwcTerm.genus);
            String specificEpithet = core.value((org.gbif.dwc.terms.Term)DwcTerm.specificEpithet);
            String infraspecificEpithet = core.value((org.gbif.dwc.terms.Term)DwcTerm.infraspecificEpithet);
            String taxonRank = core.value((org.gbif.dwc.terms.Term)DwcTerm.taxonRank);
            String datasetID = core.value((org.gbif.dwc.terms.Term)DwcTerm.datasetID);
            nameComplete = this.buildNameComplete(scientificName, scientificNameAuthorship, nameComplete);
            doc.add((IndexableField)new StringField(NameIndexField.ID.toString(), id, Field.Store.YES));
            if (StringUtils.isNotBlank((CharSequence)taxonID)) {
                doc.add((IndexableField)new StringField(NameIndexField.LSID.toString(), taxonID, Field.Store.YES));
            } else {
                System.out.println("LSID is null for " + id + " " + taxonID + " " + taxonID + " " + acceptedNameUsageID);
            }
            if (StringUtils.isNotBlank((CharSequence)parentNameUsageID)) {
                doc.add((IndexableField)new StringField("parent_id", parentNameUsageID, Field.Store.YES));
            }
            if (StringUtils.isNotBlank((CharSequence)acceptedNameUsageID)) {
                doc.add((IndexableField)new StringField(NameIndexField.ACCEPTED.toString(), acceptedNameUsageID, Field.Store.YES));
            }
            if (StringUtils.isNotBlank((CharSequence)scientificName)) {
                doc.add((IndexableField)new StoredField(NameIndexField.NAME.toString(), scientificName));
            }
            if (StringUtils.isNotBlank((CharSequence)scientificNameAuthorship)) {
                doc.add((IndexableField)new StoredField(NameIndexField.AUTHOR.toString(), scientificNameAuthorship));
            }
            if (StringUtils.isNotBlank((CharSequence)nameComplete)) {
                doc.add((IndexableField)new StoredField(NameIndexField.NAME_COMPLETE.toString(), nameComplete));
            }
            if (StringUtils.isNotBlank((CharSequence)genus)) {
                doc.add((IndexableField)new StoredField(NameIndexField.GENUS.toString(), genus));
            }
            if (StringUtils.isNotBlank((CharSequence)specificEpithet)) {
                doc.add((IndexableField)new StoredField(NameIndexField.SPECIFIC.toString(), specificEpithet));
            }
            if (StringUtils.isNotBlank((CharSequence)infraspecificEpithet)) {
                doc.add((IndexableField)new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), infraspecificEpithet));
            }
            if (StringUtils.isNotBlank((CharSequence)taxonRank)) {
                RankType rt = RankType.getForStrRank((String)taxonRank);
                if (rt != null) {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES));
                    doc.add((IndexableField)new IntPoint(NameIndexField.RANK_ID.toString(), new int[]{rt.getId()}));
                    doc.add((IndexableField)new StoredField(NameIndexField.RANK_ID.toString(), rt.getId().intValue()));
                } else {
                    doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), taxonRank, Field.Store.YES));
                    doc.add((IndexableField)new IntPoint(NameIndexField.RANK_ID.toString(), new int[]{RankType.UNRANKED.getId()}));
                    doc.add((IndexableField)new StoredField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().intValue()));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES));
                doc.add((IndexableField)new IntPoint(NameIndexField.RANK_ID.toString(), new int[]{RankType.UNRANKED.getId()}));
                doc.add((IndexableField)new StoredField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().intValue()));
            }
            if (StringUtils.equals((CharSequence)taxonID, (CharSequence)acceptedNameUsageID) || StringUtils.equals((CharSequence)id, (CharSequence)acceptedNameUsageID) || acceptedNameUsageID == null) {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES));
                if (StringUtils.isBlank((CharSequence)parentNameUsageID)) {
                    doc.add((IndexableField)new StringField("root", "T", Field.Store.YES));
                }
            } else {
                doc.add((IndexableField)new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES));
            }
            if (StringUtils.isNotBlank((CharSequence)datasetID)) {
                doc.add((IndexableField)new StoredField(NameIndexField.DATASET_ID.toString(), datasetID));
            }
            List variants = dwcr.extension((org.gbif.dwc.terms.Term)ALATerm.TaxonVariant);
            int score = -1;
            int defaultScore = this.getScore(datasetID, -1);
            HashSet<String> otherNames = new HashSet<String>();
            if (variants != null) {
                for (Record variant : variants) {
                    String priority = variant.value((org.gbif.dwc.terms.Term)ALATerm.priority);
                    if (priority != null) {
                        score = Math.max(score, Integer.parseInt(priority));
                    }
                    String sn = variant.value((org.gbif.dwc.terms.Term)DwcTerm.scientificName);
                    String sna = variant.value((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship);
                    String nc = variant.value((org.gbif.dwc.terms.Term)ALATerm.nameComplete);
                    nc = this.buildNameComplete(sn, sna, nc);
                    otherNames.add(sn);
                    otherNames.add(nc);
                    Matcher locality = LOCALITY_PATTERN.matcher(sn);
                    if (!locality.matches()) continue;
                    otherNames.add(locality.group(1).trim());
                }
            }
            doc.add((IndexableField)new StoredField(NameIndexField.PRIORITY.toString(), score < 0 ? defaultScore : score));
            for (String name : otherNames) {
                doc.add((IndexableField)new StoredField(NameIndexField.OTHER_NAMES.toString(), name));
            }
            this.loadingIndexWriter.addDocument((Iterable)doc);
            if (++i % 1000 != 0) continue;
            long finish = System.currentTimeMillis();
            log.debug((Object)("Loading index: " + i + " records per sec: " + 1000.0f / ((float)(finish / start) / 1000.0f)));
            start = finish;
        }
        log.info((Object)("Finished creating the temporary load index with " + i + " concepts"));
        this.loadingIndexWriter.commit();
        this.loadingIndexWriter.forceMerge(1);
        return true;
    }

    public void commitLoadingIndexes() throws IOException {
        if (this.loadingIndexWriter != null) {
            this.loadingIndexWriter.close();
            this.loadingIndexWriter = null;
        }
        this.lsearcher = null;
    }

    private TopDocs getLoadIdxResults(ScoreDoc after, String field, String value, int max) throws Exception {
        if (this.lsearcher == null && this.tmpDir.exists()) {
            this.lsearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)FSDirectory.open((Path)this.tmpDir.toPath())));
        } else if (this.lsearcher == null && !this.tmpDir.exists()) {
            throw new RuntimeException("A load index has not been generated. Please run this tool with '-load' before creating the search index.");
        }
        TermQuery tq = new TermQuery(new Term(field, value));
        return after == null ? this.lsearcher.search((Query)tq, max) : this.lsearcher.searchAfter(after, (Query)tq, max);
    }

    public void generateIndex() throws Exception {
        int left;
        int right;
        log.info((Object)"Loading index from temporary index.");
        TopDocs rootConcepts = this.getLoadIdxResults(null, "root", "T", PAGE_SIZE);
        int lastRight = right = (left = 1);
        int count = 0;
        ArrayList<Document> rootDocuments = new ArrayList<Document>();
        while (rootConcepts != null && rootConcepts.totalHits.value > 0L) {
            ScoreDoc lastConcept = null;
            ScoreDoc[] scoreDocArray = rootConcepts.scoreDocs;
            int n = scoreDocArray.length;
            for (int i = 0; i < n; ++i) {
                ScoreDoc sd;
                lastConcept = sd = scoreDocArray[i];
                Document doc = this.lsearcher.doc(sd.doc);
                rootDocuments.add(doc);
            }
            rootConcepts = lastConcept == null ? null : this.getLoadIdxResults(lastConcept, "root", "T", PAGE_SIZE);
            if (rootConcepts == null || rootConcepts.scoreDocs.length <= 0) continue;
            log.info((Object)"Loading next page of root concepts");
        }
        rootDocuments.sort(this::preferredChildOrder);
        for (Document doc : rootDocuments) {
            String lsid = doc.get(NameIndexField.LSID.toString());
            Usage preferred = this.preferredIdMap.get(lsid);
            left = right + 1;
            int limitRight = right + 1;
            if (preferred != null) {
                left = Math.max(left, preferred.getLeft());
                limitRight = Math.max(limitRight, preferred.getRight());
            }
            if ((right = this.addIndex(doc, preferred, 1, left, limitRight, new LinnaeanRankClassification(), 0)) - lastRight > 1000) {
                log.info((Object)("Finished loading root " + doc.get(NameIndexField.LSID.toString()) + " " + doc.get(NameIndexField.NAME.toString()) + " left:" + left + " right:" + right + " root count:" + count));
                lastRight = right;
            }
            if (++count % 10000 != 0) continue;
            log.info((Object)("Loading index:" + count));
        }
        this.writer.commit();
        this.writer.forceMerge(1);
        this.cbSearcher = new IndexSearcher((IndexReader)DirectoryReader.open((Directory)this.writer.getDirectory()));
    }

    private int addIndex(Document doc, Usage preferred, int currentDepth, int currentLeft, int limitRight, LinnaeanRankClassification higherClass, int stackCheck) throws Exception {
        IndexableField scoreField;
        int left;
        String id = doc.get(NameIndexField.ID.toString());
        TopDocs children = this.getLoadIdxResults(null, "parent_id", id, PAGE_SIZE);
        if (children.totalHits.value == 0L) {
            id = doc.get(NameIndexField.LSID.toString());
            children = this.getLoadIdxResults(null, "parent_id", id, PAGE_SIZE);
        }
        int right = left = currentLeft;
        int rankId = Integer.parseInt(doc.get(NameIndexField.RANK_ID.toString()));
        String name = doc.get(NameIndexField.NAME.toString());
        String nameComplete = doc.get(NameIndexField.NAME_COMPLETE.toString());
        String lsid = doc.get(NameIndexField.LSID.toString());
        String cname = name;
        ParsedName pn = null;
        try {
            pn = this.parser.parse(name);
            if (pn.isParsableType()) {
                cname = pn.canonicalName();
            }
        }
        catch (Exception exception) {
            // empty catch block
        }
        LinnaeanRankClassification newcl = new LinnaeanRankClassification(higherClass);
        switch (rankId) {
            case 1000: {
                newcl.setKingdom(cname);
                newcl.setKid(lsid);
                break;
            }
            case 2000: {
                newcl.setPhylum(cname);
                newcl.setPid(lsid);
                break;
            }
            case 3000: {
                newcl.setKlass(cname);
                newcl.setCid(lsid);
                break;
            }
            case 4000: {
                newcl.setOrder(cname);
                newcl.setOid(lsid);
                break;
            }
            case 5000: {
                newcl.setFamily(cname);
                newcl.setFid(lsid);
                break;
            }
            case 6000: {
                newcl.setGenus(cname);
                newcl.setGid(lsid);
                break;
            }
            case 7000: {
                newcl.setSpecies(cname);
                newcl.setSid(lsid);
                if (pn == null || !pn.isParsableType()) break;
                newcl.setSpecificEpithet(pn.getSpecificEpithet());
            }
        }
        ArrayList<Document> childDocs = new ArrayList<Document>(children.scoreDocs.length);
        while (children != null && children.scoreDocs.length > 0) {
            ScoreDoc lastChild = null;
            ScoreDoc[] scoreDocArray = children.scoreDocs;
            int n = scoreDocArray.length;
            for (int i = 0; i < n; ++i) {
                ScoreDoc child;
                lastChild = child = scoreDocArray[i];
                Document cdoc = this.lsearcher.doc(child.doc);
                if (cdoc == null) {
                    log.error((Object)("Unable to retrieve document " + child.doc));
                    continue;
                }
                if ("T".equals(cdoc.get("is_synonym"))) {
                    log.error((Object)("Synonym " + cdoc.get("lsid") + " has parent " + cdoc.get("parent_id") + " ignoring"));
                    continue;
                }
                if (cdoc.get("id").equals(doc.get("id"))) continue;
                if (stackCheck > 900) {
                    log.warn((Object)("Stack check depth " + stackCheck + "\n\t\tParent: " + doc.get("id") + " - " + doc.get("lsid") + " - " + doc.get("parent_id") + " - " + doc.get("name") + "\n\t\tChild: " + cdoc.get("id") + " - " + cdoc.get("lsid") + " _ " + cdoc.get("parent_id") + " - " + cdoc.get("name")));
                }
                if (stackCheck < 1000) {
                    childDocs.add(cdoc);
                    continue;
                }
                log.warn((Object)("Stack overflow detected for name - depth " + stackCheck + "\n\t\tParent: " + doc.get("id") + " - " + doc.get("lsid") + " - " + doc.get("parent_id") + " - " + doc.get("name") + "\n\t\tChild: " + cdoc.get("id") + " - " + cdoc.get("lsid") + " _ " + cdoc.get("parent_id") + " - " + cdoc.get("name")));
            }
            children = lastChild == null ? null : this.getLoadIdxResults(lastChild, "parent_id", id, PAGE_SIZE);
            if (children == null || children.scoreDocs.length <= 0) continue;
            log.info((Object)("Loading next page of children for " + id));
        }
        childDocs.sort(this::preferredChildOrder);
        for (Document cdoc : childDocs) {
            int cLeft = right + 1;
            int cLimitRight = limitRight;
            Usage cusage = this.preferredIdMap.get(cdoc.get(NameIndexField.LSID.toString()));
            if (cusage != null) {
                cLeft = Math.max(cLeft, cusage.getLeft());
                cLimitRight = Math.min(cLimitRight, cusage.getRight());
            }
            right = this.addIndex(cdoc, cusage, currentDepth + 1, cLeft, cLimitRight, newcl, stackCheck + 1);
        }
        right = Math.max(right, limitRight);
        if (preferred != null) {
            right = Math.max(right, preferred.getRight());
        }
        if (left % 2000 == 0) {
            log.debug((Object)("Last processed lft:" + left + " rgt:" + right + " depth:" + currentDepth + " classification " + newcl));
        }
        int score = (scoreField = doc.getField(NameIndexField.PRIORITY.toString())) == null ? 0 : scoreField.numericValue().intValue();
        HashSet otherNames = Sets.newHashSet((Object[])doc.getValues(NameIndexField.OTHER_NAMES.toString()));
        Document indexDoc = this.createALAIndexDocument(name, doc.get(NameIndexField.ID.toString()), lsid, doc.get(NameIndexField.AUTHOR.toString()), doc.get(NameIndexField.RANK.toString()), doc.get(NameIndexField.RANK_ID.toString()), left, right, newcl, nameComplete, otherNames, score);
        this.writer.addDocument((Iterable)indexDoc);
        this.idMap.put(lsid, new Usage(lsid, name, TaxonomicType.ACCEPTED.getTerm(), left, right));
        if (right > limitRight) {
            if (!this.indexChanged) {
                log.warn((Object)("Overflow in left- and right-values at " + lsid + " left=" + left + " right=" + right));
            }
            this.indexChanged = true;
        }
        return right + 1;
    }

    protected int preferredChildOrder(Document d1, Document d2) {
        int left2;
        String lsid1 = d1.get(NameIndexField.LSID.toString());
        String lsid2 = d2.get(NameIndexField.LSID.toString());
        Usage usage1 = this.preferredIdMap.get(lsid1);
        Usage usage2 = this.preferredIdMap.get(lsid2);
        int left1 = usage1 == null ? Integer.MAX_VALUE : usage1.getLeft();
        int n = left2 = usage2 == null ? Integer.MAX_VALUE : usage2.getLeft();
        if (left1 != left2) {
            return left1 - left2;
        }
        return lsid1.compareTo(lsid2);
    }

    @Override
    protected Document createALASynonymDocument(String scientificName, String author, String nameComplete, Collection<String> otherNames, String id, String lsid, String nameLsid, String acceptedLsid, String acceptedId, int priority, String synonymType) {
        lsid = StringUtils.isBlank((CharSequence)lsid) ? nameLsid : lsid;
        Document accepted = null;
        String kingdom = null;
        String phylum = null;
        String clazz = null;
        String order = null;
        String family = null;
        String genus = null;
        String specificEpithet = null;
        String infraspecificEpithet = null;
        try {
            TopDocs hits = this.cbSearcher.search((Query)new TermQuery(new Term(NameIndexField.LSID.toString(), acceptedLsid)), 1);
            if (hits.totalHits.value > 0L) {
                accepted = this.cbSearcher.doc(hits.scoreDocs[0].doc);
            }
        }
        catch (Exception ex) {
            log.warn((Object)("Error finding accepted document for " + acceptedLsid), (Throwable)ex);
        }
        if (accepted == null) {
            log.warn((Object)("No accepted document for " + scientificName + " " + lsid + " -> " + acceptedLsid));
        } else {
            int rank;
            String rf = accepted.get(NameIndexField.RANK_ID.toString());
            int n = rank = rf == null ? -1 : Integer.parseInt(rf);
            if (rank > RankType.KINGDOM.getId()) {
                kingdom = accepted.get(RankType.KINGDOM.getRank());
            }
            if (rank > RankType.PHYLUM.getId()) {
                phylum = accepted.get(RankType.PHYLUM.getRank());
            }
            if (rank > RankType.CLASS.getId()) {
                clazz = accepted.get(RankType.CLASS.getRank());
            }
            if (rank > RankType.ORDER.getId()) {
                order = accepted.get(RankType.ORDER.getRank());
            }
            if (rank > RankType.FAMILY.getId()) {
                family = accepted.get(RankType.FAMILY.getRank());
            }
            try {
                ParsedName sn = this.parser.parse(scientificName);
                if (sn.getRank() != null && sn.getRank().isSpeciesOrBelow()) {
                    genus = sn.getGenusOrAbove();
                    specificEpithet = sn.getSpecificEpithet();
                    infraspecificEpithet = sn.getInfraSpecificEpithet();
                }
            }
            catch (UnparsableException unparsableException) {
                // empty catch block
            }
        }
        Document doc = this.createALAIndexDocument(scientificName, id, lsid, null, null, kingdom, null, phylum, null, clazz, null, order, null, family, null, genus, null, null, null, 0, 0, acceptedLsid, specificEpithet, infraspecificEpithet, author, nameComplete, otherNames, priority);
        if (doc != null && synonymType != null) {
            try {
                doc.add((IndexableField)new TextField(NameIndexField.SYNONYM_TYPE.toString(), synonymType, Field.Store.YES));
            }
            catch (Exception e) {
                System.out.println("Error on " + scientificName + " " + author + " " + id + ".  " + e.getMessage());
            }
        }
        return doc;
    }

    protected int getScore(String datasetID, int rankId) {
        float boost;
        float f = boost = this.priorities.containsKey(datasetID) ? this.priorities.get(datasetID).floatValue() : 1.0f;
        if (rankId >= 0 && rankId % 1000 != 0) {
            boost *= 0.2f;
        }
        return Math.round(boost * 1000.0f);
    }

    private void addSynonymsToIndex(Archive archive) throws Exception {
        ClosableIterator it = archive.iterator();
        int i = 0;
        int count = 0;
        while (it.hasNext()) {
            StarRecord dwcr = (StarRecord)it.next();
            Record core = dwcr.core();
            ++i;
            String id = core.id();
            String lsid = core.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID) != null ? core.value((org.gbif.dwc.terms.Term)DwcTerm.taxonID) : id;
            String acceptedNameUsageID = core.value((org.gbif.dwc.terms.Term)DwcTerm.acceptedNameUsageID);
            String nameComplete = core.value((org.gbif.dwc.terms.Term)ALATerm.nameComplete);
            String scientificName = core.value((org.gbif.dwc.terms.Term)DwcTerm.scientificName);
            String scientificNameAuthorship = core.value((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship);
            nameComplete = this.buildNameComplete(scientificName, scientificNameAuthorship, nameComplete);
            String datasetID = core.value((org.gbif.dwc.terms.Term)DwcTerm.datasetID);
            String taxonomicStatus = core.value((org.gbif.dwc.terms.Term)DwcTerm.taxonomicStatus);
            if (StringUtils.isNotEmpty((CharSequence)acceptedNameUsageID) && !StringUtils.equals((CharSequence)acceptedNameUsageID, (CharSequence)id) && !StringUtils.equals((CharSequence)acceptedNameUsageID, (CharSequence)lsid)) {
                ++count;
                List variants = dwcr.extension((org.gbif.dwc.terms.Term)ALATerm.TaxonVariant);
                int score = -1;
                int defaultScore = this.getScore(datasetID, -1);
                HashSet<String> otherNames = new HashSet<String>();
                if (variants != null) {
                    for (Record variant : variants) {
                        String priority = variant.value((org.gbif.dwc.terms.Term)ALATerm.priority);
                        if (priority != null) {
                            score = Math.max(score, Integer.parseInt(priority));
                        }
                        String sn = variant.value((org.gbif.dwc.terms.Term)DwcTerm.scientificName);
                        String sna = variant.value((org.gbif.dwc.terms.Term)DwcTerm.scientificNameAuthorship);
                        String nc = variant.value((org.gbif.dwc.terms.Term)ALATerm.nameComplete);
                        nc = this.buildNameComplete(sn, sna, nc);
                        otherNames.add(sn);
                        otherNames.add(nc);
                    }
                }
                try {
                    Document doc;
                    if (log.isDebugEnabled()) {
                        log.debug((Object)("Scientific name:  " + scientificName + ", LSID:  " + lsid));
                    }
                    if ((doc = this.createALASynonymDocument(scientificName, scientificNameAuthorship, nameComplete, otherNames, id, lsid, lsid, acceptedNameUsageID, acceptedNameUsageID, score < 0 ? defaultScore : score, taxonomicStatus)) != null) {
                        this.writer.addDocument((Iterable)doc);
                        this.idMap.put(lsid, new Usage(lsid, scientificName, taxonomicStatus, acceptedNameUsageID));
                    } else {
                        log.warn((Object)("Problem processing scientificName:  " + scientificName + ", ID:  " + id + ", LSID:  " + lsid));
                    }
                }
                catch (Exception e) {
                    log.error((Object)("Exception thrown processing Scientific name:  " + scientificName + ", LSID:  " + lsid));
                    log.error((Object)e.getMessage(), (Throwable)e);
                }
            }
            if (i % 1000 != 0) continue;
            log.debug((Object)("Processed " + i + " records " + count + " synonyms"));
        }
    }

    public static void findDwcas(File dir, boolean recurse, List<File> found) {
        File meta = new File(dir, "meta.xml");
        if (meta.exists()) {
            found.add(dir);
        }
        if (recurse && dir.exists()) {
            for (File d : dir.listFiles()) {
                if (!d.isDirectory()) continue;
                DwcaNameIndexer.findDwcas(d, recurse, found);
            }
        }
    }

    protected void writeMetadata(File metadataSkeleton) throws Exception {
        ObjectMapper mapper = new ObjectMapper();
        mapper.enable(SerializationFeature.INDENT_OUTPUT);
        mapper.enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
        mapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
        Map metadata = (Map)mapper.readValue(this.getClass().getResource("/metadata-skeleton.json"), Map.class);
        if (metadataSkeleton != null) {
            Map override = (Map)mapper.readValue(metadataSkeleton, Map.class);
            metadata.putAll(override);
        }
        metadata.put("created", this.buildDateString(new Date()));
        metadata.put("creator", System.getProperty("user.name"));
        metadata.put("indicesChanged", this.indexChanged);
        if (this.sources != null) {
            List ss = this.sources.stream().map(this::buildSourceMetadata).collect(Collectors.toList());
            metadata.put("source", ss);
            Set ci = this.sources.stream().flatMap(s -> s.getContacts().stream()).map(Contact::getOrganization).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
            metadata.put("contributor", ci);
            Set cit = this.sources.stream().flatMap(s -> s.getBibliographicCitations().stream()).map(Citation::getText).filter(StringUtils::isNotBlank).collect(Collectors.toSet());
            metadata.put("bibliographicCitations", cit);
        }
        File metadataFile = new File(this.targetDir, "metadata.json");
        mapper.writeValue(metadataFile, (Object)metadata);
    }

    protected Map buildSourceMetadata(Dataset source) {
        HashMap<String, Object> sm = new HashMap<String, Object>();
        sm.put("created", this.buildDateString(source.getCreated()));
        sm.put("modified", this.buildDateString(source.getModified()));
        sm.put("published", this.buildDateString(source.getPubDate()));
        sm.put("creator", source.getCreatedBy());
        if (source.getLicense() != null) {
            sm.put("license", source.getLicense().getLicenseTitle());
            sm.put("licenseUrl", source.getLicense().getLicenseUrl());
        }
        sm.put("title", source.getTitle());
        sm.put("description", source.getDescription());
        sm.put("rights", source.getRights());
        if (source.getCitation() != null) {
            sm.put("citation", source.getCitation().getText());
        }
        if (source.getIdentifiers() != null) {
            sm.put("identifier", source.getIdentifiers().stream().map(Identifier::getIdentifier).collect(Collectors.toList()));
        }
        return sm;
    }

    protected Stream<String> buildAttributions(Dataset source) {
        return source.getContacts().stream().map(Contact::computeCompleteName);
    }

    protected String buildDateString(Date date) {
        if (date == null) {
            return null;
        }
        LocalDate local = date.toInstant().atZone(ZoneOffset.systemDefault()).toLocalDate();
        return DateTimeFormatter.ISO_LOCAL_DATE.format(local);
    }

    protected void writeIdMap() throws Exception {
        if (this.idMap == null) {
            return;
        }
        File usageFile = new File(this.targetDir, "idmap.txt");
        try (FileWriter w = new FileWriter(usageFile, false);){
            CSVWriter writer = new CSVWriter((Writer)w, '\t', '\"', '\\', "\n");
            writer.writeNext(Usage.HEADERS, false);
            for (Usage usage : this.idMap.values()) {
                writer.writeNext(usage.asArray(), false);
            }
        }
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public void loadPreferredIdMap(File map) throws IOException {
        log.info((Object)("Loading preferred ID map " + map));
        try (FileReader r = new FileReader(map);){
            CSVParser parser = new CSVParserBuilder().withSeparator('\t').withQuoteChar('\"').withEscapeChar('\\').build();
            CSVReader reader = new CSVReaderBuilder((Reader)r).withSkipLines(1).withCSVParser((ICSVParser)parser).build();
            for (String[] row : reader) {
                try {
                    Usage usage = new Usage(row);
                    if (this.preferredIdMap.containsKey(usage.getTaxonID())) {
                        log.warn((Object)("Duplicate preferred ID entry for " + usage));
                    }
                    this.preferredIdMap.put(usage.getTaxonID(), usage);
                }
                catch (Exception ex) {
                    log.error((Object)("Invalid row " + row));
                    throw ex;
                    return;
                }
            }
        }
    }

    public static void main(String[] args) {
        String DEFAULT_DWCA = "/data/lucene/sources/dwca-col";
        String DEFAULT_IRMNG = "/data/lucene/sources/IRMNG_DWC_HOMONYMS";
        String DEFAULT_COMMON_NAME = "/data/lucene/sources/col_vernacular.txt";
        String DEFAULT_TARGET_DIR = "/data/lucene/namematching";
        String DEFAULT_TMP_DIR = "/data/lucene/nmload-tmp";
        String DEFAULT_PRIORITIES = "/data/lucene/sources/priorities.properties";
        String DEFAULT_IDENTIFIERS = "/data/lucene/sources/identifiers.txt";
        Options options = new Options();
        options.addOption("v", "version", false, "Retrieve version information");
        options.addOption("h", "help", false, "Retrieve options");
        options.addOption("all", false, "Generates the load index and search index");
        options.addOption("load", false, "Generate the load index only. The load index is a temporary index generated from the raw data files used to load the main search index");
        options.addOption("search", false, "Generates the search index. A load index must already be created for this to run.");
        options.addOption("irmng", true, "The absolute path to the unzipped irmng DwCA. IRMNG is used to detect homonyms. Defaults to /data/lucene/sources/IRMNG_DWC_HOMONYMS");
        options.addOption("dwca", true, "The absolute path to the unzipped DwCA (or a directory containing unzipped DWC-A - see recurse) for the scientific names. If  Defaults to /data/lucene/sources/dwca-col See also, the recurse option");
        options.addOption("recurse", false, "Recurse through the sub-directories of the dwca directory, looking for directories with a meta.xml");
        options.addOption("priorities", true, "A properties file containing priority multiplers for the different data sources, keyed by datasetID->float. Defaults to /data/lucene/sources/priorities.properties");
        options.addOption("ids", true, "A tab seperated values file containing additional taxon identifiers. Defaults to /data/lucene/sources/identifiers.txt");
        options.addOption("target", true, "The target directory to write the new name index to. Defaults to /data/lucene/namematching");
        options.addOption("tmp", true, "The tmp directory for the load index. Defaults to /data/lucene/nmload-tmp");
        options.addOption("common", true, "The common (vernacular) name file. Defaults to /data/lucene/sources/col_vernacular.txt");
        options.addOption("testSearch", true, "Debug a name search. This uses the target directory to search against.");
        options.addOption("testCommonSearch", true, "Debug a common name search. This takes a taxonID for the search.");
        options.addOption("testCommonSearchLang", true, "Debug a common name search, supplying a language.");
        options.addOption("metadata", true, "The metadata skeleton to use, points to a JSON file. Values default to the distribution skeleton.");
        options.addOption("idmap", true, "The name of an identifier map from a previous name index. The index build will attempt to reuse left- and right-values from this map when constructing an index.");
        BasicParser parser = new BasicParser();
        try {
            ALANameSearcher searcher;
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("v")) {
                InputStream stream = DwcaNameIndexer.class.getResourceAsStream("/git.properties");
                Properties properties = new Properties();
                if (stream != null) {
                    properties.load(stream);
                    properties.list(System.out);
                } else {
                    System.err.println("Unable to retrieve versioning information");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("help")) {
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("testSearch")) {
                boolean indexExists = new File("/data/lucene/namematching").exists();
                if (indexExists) {
                    Map props;
                    System.out.println("Search for name: " + line.getOptionValue("testSearch"));
                    searcher = new ALANameSearcher(line.getOptionValue("target", "/data/lucene/namematching"));
                    NameSearchResult nsr = searcher.searchForRecord(line.getOptionValue("testSearch"));
                    if (nsr == null) {
                        nsr = searcher.searchForRecordByLsid(line.getOptionValue("testSearch"));
                    }
                    if (nsr != null) {
                        props = nsr.toMap();
                        for (Map.Entry entry : props.entrySet()) {
                            System.out.println((String)entry.getKey() + ": " + (String)entry.getValue());
                        }
                    } else {
                        nsr = searcher.searchForCommonName(line.getOptionValue("testSearch"));
                        if (nsr != null) {
                            props = nsr.toMap();
                            for (Map.Entry entry : props.entrySet()) {
                                System.out.println((String)entry.getKey() + ": " + (String)entry.getValue());
                            }
                        } else {
                            System.err.println("No match for " + line.getOptionValue("testSearch"));
                        }
                    }
                    System.exit(1);
                } else {
                    System.err.println("Index unreadable. Check /data/lucene/namematching");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            if (line.hasOption("testCommonSearch")) {
                boolean indexExists = new File("/data/lucene/namematching").exists();
                if (indexExists) {
                    System.out.println("Search for name: " + line.getOptionValue("testCommonSearch"));
                    searcher = new ALANameSearcher(line.getOptionValue("target", "/data/lucene/namematching"));
                    String lsid = line.getOptionValue("testCommonSearch");
                    String language = line.getOptionValue("testCommonSearchLang");
                    String commonName = null;
                    commonName = StringUtils.isNotBlank((CharSequence)language) ? searcher.getCommonNameForLSID(lsid, new String[]{language}) : searcher.getCommonNameForLSID(lsid);
                    if (commonName == null) {
                        if (StringUtils.isNotBlank((CharSequence)language)) {
                            System.err.println("No common name indexed for taxonID: " + lsid + " and language " + language);
                        } else {
                            System.err.println("No common name indexed for taxonID: " + lsid);
                        }
                    } else {
                        System.out.println("Match: " + commonName);
                    }
                    System.exit(1);
                } else {
                    System.err.println("Index unreadable. Check /data/lucene/namematching");
                }
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            boolean recurse = line.hasOption("recurse");
            boolean load = line.hasOption("load") || line.hasOption("all");
            boolean search = line.hasOption("search") || line.hasOption("all");
            File metadataSkeleton = null;
            if (line.hasOption("metadata") && !(metadataSkeleton = new File(line.getOptionValue("metadata"))).exists()) {
                System.err.println("Metadata file " + metadataSkeleton + " does not exist");
                System.exit(1);
            }
            if (!(line.hasOption("load") || line.hasOption("search") || line.hasOption("all"))) {
                load = true;
                search = true;
            }
            log.info((Object)("Generating loading index: " + load));
            log.info((Object)("Generating searching index: " + search));
            boolean defaultIrmngReadable = new File("/data/lucene/sources/IRMNG_DWC_HOMONYMS").exists();
            boolean defaultCommonReadable = new File("/data/lucene/sources/col_vernacular.txt").exists();
            boolean defaultDwcaReadable = new File("/data/lucene/sources/dwca-col").exists();
            boolean defaultPriorities = new File("/data/lucene/sources/priorities.properties").exists();
            boolean defaultIdentifiers = new File("/data/lucene/sources/identifiers.txt").exists();
            if (line.getOptionValue("dwca") != null) {
                log.info((Object)("Using the  DwCA name file: " + line.getOptionValue("dwca")));
            } else if (defaultDwcaReadable) {
                log.info((Object)"Using the default DwCA name file: /data/lucene/sources/dwca-col");
            } else {
                log.error((Object)"No DwC Archive specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/dwca-col");
                new HelpFormatter().printHelp("nameindexer", options);
                System.exit(-1);
            }
            File preferredIdMap = null;
            if (line.getOptionValue("idmap") != null && !(preferredIdMap = new File(line.getOptionValue("idmap"))).exists()) {
                log.error((Object)("Preferred ID map file " + preferredIdMap + " does not exist"));
                System.exit(1);
            }
            if (line.getOptionValue("irmng") == null && !defaultIrmngReadable) {
                log.warn((Object)"No IRMNG export specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/IRMNG_DWC_HOMONYMS");
            } else if (line.getOptionValue("irmng") == null) {
                log.info((Object)"Using the default IRMNG name file: /data/lucene/sources/IRMNG_DWC_HOMONYMS");
            } else {
                log.info((Object)("Using the  IRMNG name file: " + line.getOptionValue("irmng")));
            }
            if (line.getOptionValue("common") == null && !defaultCommonReadable) {
                log.warn((Object)"No common name export specified and the default file path does not exist or is inaccessible. Default path: /data/lucene/sources/col_vernacular.txt");
            } else if (line.getOptionValue("common") == null) {
                log.info((Object)"Using the default common name file: /data/lucene/sources/col_vernacular.txt");
            } else {
                log.info((Object)("Using the common name file: " + line.getOptionValue("common")));
            }
            if (line.getOptionValue("priorities") == null && !defaultPriorities) {
                log.warn((Object)"No priorities file, defaulting to uniform priorities.");
            } else if (line.getOptionValue("priorities") == null) {
                log.info((Object)"Using the default priorities file: /data/lucene/sources/priorities.properties");
            } else {
                log.info((Object)("Using the priorities file: " + line.getOptionValue("priorities")));
            }
            if (line.getOptionValue("ids") == null && !defaultIdentifiers) {
                log.warn((Object)"No identifiers file, Default is /data/lucene/sources/identifiers.txt");
            } else if (line.getOptionValue("ids") == null) {
                log.info((Object)"Using the default identifiers file: /data/lucene/sources/identifiers.txt");
            } else {
                log.info((Object)("Using the identifiers file: " + line.getOptionValue("ids")));
            }
            File targetDirectory = new File(line.getOptionValue("target", "/data/lucene/namematching"));
            if (targetDirectory.exists()) {
                String newPath = targetDirectory.getAbsolutePath() + "_" + DateFormatUtils.format((Date)new Date(), (String)"yyyy-MM-dd_hh-mm-ss");
                log.info((Object)("Target directory already exists. Backing up to : " + newPath));
                File newTargetDirectory = new File(newPath);
                FileUtils.moveDirectory((File)targetDirectory, (File)newTargetDirectory);
                FileUtils.forceMkdir((File)targetDirectory);
            }
            File commonNameFile = new File(line.getOptionValue("common", "/data/lucene/sources/col_vernacular.txt"));
            File irmngFile = new File(line.getOptionValue("irmng", "/data/lucene/sources/IRMNG_DWC_HOMONYMS"));
            File identifiersFile = new File(line.getOptionValue("ids", "/data/lucene/sources/identifiers.txt"));
            File prioritiesFile = new File(line.getOptionValue("priorities", "/data/lucene/sources/priorities.properties"));
            Properties priorities = new Properties();
            if (prioritiesFile.exists()) {
                priorities.load(new FileInputStream(prioritiesFile));
            }
            ArrayList<File> dwcas = new ArrayList<File>();
            ArrayList<File> bases = new ArrayList<File>();
            if (line.hasOption("dwca")) {
                for (String base : line.getOptionValues("dwca")) {
                    bases.add(new File(base));
                }
            } else {
                bases.add(new File("/data/lucene/sources/dwca-col"));
            }
            log.info((Object)("Base sources: " + bases));
            for (File base : bases) {
                DwcaNameIndexer.findDwcas(base, recurse, dwcas);
            }
            if (dwcas.isEmpty()) {
                log.warn((Object)("No DwCA directories found under " + bases));
                System.exit(1);
            }
            log.info((Object)("Loading DwCAs: " + dwcas));
            DwcaNameIndexer indexer = new DwcaNameIndexer(targetDirectory, new File(line.getOptionValue("tmp", "/data/lucene/nmload-tmp")), priorities, load, search);
            indexer.begin();
            if (preferredIdMap != null) {
                indexer.loadPreferredIdMap(preferredIdMap);
            }
            HashSet<File> used = new HashSet<File>();
            if (load) {
                for (File dwca : dwcas) {
                    if (!indexer.createLoadingIndex(dwca)) continue;
                    used.add(dwca);
                }
                indexer.commitLoadingIndexes();
            }
            indexer.generateIndex();
            for (File dwca : dwcas) {
                if (!indexer.create(dwca)) continue;
                used.add(dwca);
            }
            indexer.indexCommonNames(commonNameFile);
            indexer.createIrmng(irmngFile);
            indexer.createExtraIdIndex(identifiersFile);
            for (File dwca : dwcas) {
                if (!indexer.loadCommonNames(dwca)) continue;
                used.add(dwca);
            }
            indexer.commit();
            indexer.writeMetadata(metadataSkeleton);
            indexer.writeIdMap();
            for (File dwca : dwcas) {
                if (used.contains(dwca)) continue;
                log.warn((Object)("Source " + dwca + " is unused"));
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static class Usage {
        public static final String[] HEADERS = new String[]{"taxonID", "scientificName", "taxonomicStatus", "left", "right", "acceptedNameUsageID"};
        private String taxonID;
        private String scientificName;
        private String taxonomicStatus;
        private int left;
        private int right;
        private String acceptedNameUsageID;

        public Usage(String taxonID, String scientificName, String taxonomicStatus, int left, int right, String acceptedNameUsageID) {
            this.taxonID = taxonID;
            this.scientificName = scientificName;
            this.taxonomicStatus = taxonomicStatus;
            this.left = left;
            this.right = right;
            this.acceptedNameUsageID = acceptedNameUsageID;
        }

        public Usage(String taxonID, String scientificName, String taxonomicStatus, String acceptedNameUsageID) {
            this(taxonID, scientificName, taxonomicStatus, 0, 0, acceptedNameUsageID);
        }

        public Usage(String taxonID, String scientificName, String taxonomicStatus, int left, int right) {
            this(taxonID, scientificName, taxonomicStatus, left, right, null);
        }

        public Usage(String[] row) {
            this.taxonID = StringUtils.stripToNull((String)row[0]);
            this.scientificName = StringUtils.stripToNull((String)row[1]);
            this.taxonomicStatus = StringUtils.stripToNull((String)row[2]);
            String v = StringUtils.stripToNull((String)row[3]);
            this.left = v == null ? 0 : Integer.parseInt(v);
            v = StringUtils.stripToNull((String)row[4]);
            this.right = v == null ? 0 : Integer.parseInt(v);
            this.acceptedNameUsageID = StringUtils.stripToNull((String)row[5]);
        }

        public String getTaxonID() {
            return this.taxonID;
        }

        public String getScientificName() {
            return this.scientificName;
        }

        public String getTaxonomicStatus() {
            return this.taxonomicStatus;
        }

        public int getLeft() {
            return this.left;
        }

        public int getRight() {
            return this.right;
        }

        public String getAcceptedNameUsageID() {
            return this.acceptedNameUsageID;
        }

        public String[] asArray() {
            return new String[]{this.taxonID, this.scientificName, this.taxonomicStatus, this.left == 0 ? null : Integer.toString(this.left), this.right == 0 ? null : Integer.toString(this.right), this.acceptedNameUsageID};
        }

        public String toString() {
            return "Usage{taxonID='" + this.taxonID + '\'' + ", scientificName='" + this.scientificName + '\'' + ", taxonomicStatus='" + this.taxonomicStatus + '\'' + ", left=" + this.left + ", right=" + this.right + ", acceptedNameUsageID='" + this.acceptedNameUsageID + '\'' + '}';
        }
    }
}

