au.org.ala.checklist.lucene
Class CBCreateLuceneIndex

java.lang.Object
  extended by au.org.ala.checklist.lucene.CBCreateLuceneIndex
Direct Known Subclasses:
DwcaNameIndexer

public class CBCreateLuceneIndex
extends java.lang.Object

Creates the Lucene index based on the cb_names export generated by ChecklistBankExporter

Author:
Natasha

Nested Class Summary
static class CBCreateLuceneIndex.IndexField
           
 
Field Summary
protected  java.lang.String afdFile
           
protected  java.lang.String alaConcepts
           
protected  java.lang.String alaSynonyms
           
protected  java.lang.String apniFile
           
protected  java.lang.String cbExportFile
           
protected  java.lang.String colFile
           
protected  javax.sql.DataSource dataSource
           
protected  java.lang.String extraALAConcepts
           
protected  java.lang.String irmngDwcaDirectory
           
protected  java.lang.String irmngFile
           
protected  java.lang.String lexFile
           
protected  org.apache.commons.logging.Log log
           
protected  java.lang.String taxonConeptName
           
 
Constructor Summary
CBCreateLuceneIndex()
           
 
Method Summary
 void addAdditionalName(java.lang.String lsid, java.lang.String scientificName, java.lang.String author, LinnaeanRankClassification cl)
           
 void commit()
           
 void commit(boolean close, boolean merge)
           
 org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name, java.lang.String id, java.lang.String lsid, java.lang.String author, LinnaeanRankClassification cl)
           
 org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name, java.lang.String id, java.lang.String lsid, java.lang.String author, java.lang.String rank, java.lang.String rankId, java.lang.String left, java.lang.String right, LinnaeanRankClassification cl)
           
protected  org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name, java.lang.String id, java.lang.String lsid, java.lang.String rank, java.lang.String rankString, java.lang.String kingdom, java.lang.String kid, java.lang.String phylum, java.lang.String pid, java.lang.String clazz, java.lang.String cid, java.lang.String order, java.lang.String oid, java.lang.String family, java.lang.String fid, java.lang.String genus, java.lang.String gid, java.lang.String species, java.lang.String sid, java.lang.String left, java.lang.String right, java.lang.String acceptedConcept, java.lang.String specificEpithet, java.lang.String infraspecificEpithet, java.lang.String author, float boost)
           
protected  org.apache.lucene.document.Document createALASynonymDocument(java.lang.String scientificName, java.lang.String author, java.lang.String id, java.lang.String lsid, java.lang.String nameLsid, java.lang.String acceptedLsid, java.lang.String acceptedId, float boost, java.lang.String synonymType)
           
 void createIndex(java.lang.String exportsDir, java.lang.String indexDir, boolean generateSciNames, boolean generateCommonNames)
          Creates the index from the specified checklist bank names usage export file into the specified index directory.
 void createIndex(java.lang.String exportsDir, java.lang.String indexDir, java.lang.String acceptedFile, java.lang.String synonymFile, java.lang.String irmngDwca, boolean generateSciNames, boolean generateCommonNames)
           
protected  org.apache.lucene.index.IndexWriter createIndexWriter(java.io.File directory, org.apache.lucene.analysis.Analyzer analyzer, boolean replace)
          Creates an index writer in the specified directory.
 void createIrmngIndex(java.lang.String exportsDir, java.lang.String indexDir)
           
 void deleteName(java.lang.String lsid)
          Deletes the entry that has the supplied lsid.
protected  org.apache.lucene.document.Document getCommonNameDocument(java.lang.String cn, java.lang.String sn, java.lang.String lsid, float boost)
           
protected  org.apache.lucene.document.Document getCommonNameDocument(java.lang.String cn, java.lang.String sn, java.lang.String lsid, float boost, boolean checkAccepted)
           
 java.lang.String getIndexDirectory()
           
protected  void indexIrmngDwcA(org.apache.lucene.index.IndexWriter iw, java.lang.String archiveDirectory)
           
 void init()
           
static void main(java.lang.String[] args)
          Generates the Lucene index required for the name matching API.
 void setIndexDirectory(java.lang.String indexDirectory)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

cbExportFile

protected java.lang.String cbExportFile

lexFile

protected java.lang.String lexFile

irmngFile

protected java.lang.String irmngFile

colFile

protected java.lang.String colFile

extraALAConcepts

protected java.lang.String extraALAConcepts

alaConcepts

protected java.lang.String alaConcepts

alaSynonyms

protected java.lang.String alaSynonyms

irmngDwcaDirectory

protected java.lang.String irmngDwcaDirectory

afdFile

protected java.lang.String afdFile

apniFile

protected java.lang.String apniFile

taxonConeptName

protected java.lang.String taxonConeptName

log

protected org.apache.commons.logging.Log log

dataSource

protected javax.sql.DataSource dataSource
Constructor Detail

CBCreateLuceneIndex

public CBCreateLuceneIndex()
Method Detail

init

public void init()
          throws java.lang.Exception
Throws:
java.lang.Exception

createIndex

public void createIndex(java.lang.String exportsDir,
                        java.lang.String indexDir,
                        boolean generateSciNames,
                        boolean generateCommonNames)
                 throws java.lang.Exception
Creates the index from the specified checklist bank names usage export file into the specified index directory.

Parameters:
cbExportFile - A cb export file as generated from the ChecklistBankExporter
lexFile -
irmngFile -
indexDir - The directory in which the 2 indices will be created.
Throws:
java.lang.Exception

createIrmngIndex

public void createIrmngIndex(java.lang.String exportsDir,
                             java.lang.String indexDir)
                      throws java.lang.Exception
Throws:
java.lang.Exception

createIndex

public void createIndex(java.lang.String exportsDir,
                        java.lang.String indexDir,
                        java.lang.String acceptedFile,
                        java.lang.String synonymFile,
                        java.lang.String irmngDwca,
                        boolean generateSciNames,
                        boolean generateCommonNames)
                 throws java.lang.Exception
Throws:
java.lang.Exception

createIndexWriter

protected org.apache.lucene.index.IndexWriter createIndexWriter(java.io.File directory,
                                                                org.apache.lucene.analysis.Analyzer analyzer,
                                                                boolean replace)
                                                         throws java.lang.Exception
Creates an index writer in the specified directory. It will create/recreate the target directory

Parameters:
directory -
analyzer -
Returns:
Throws:
java.lang.Exception

addAdditionalName

public void addAdditionalName(java.lang.String lsid,
                              java.lang.String scientificName,
                              java.lang.String author,
                              LinnaeanRankClassification cl)
                       throws java.lang.Exception
Throws:
java.lang.Exception

deleteName

public void deleteName(java.lang.String lsid)
                throws java.lang.Exception
Deletes the entry that has the supplied lsid. It will also delete all the synonyms associated with it

Parameters:
lsid -
Throws:
java.lang.Exception

commit

public void commit()
            throws java.lang.Exception
Throws:
java.lang.Exception

commit

public void commit(boolean close,
                   boolean merge)
            throws java.lang.Exception
Parameters:
merge - whether or not to merge the index
close - whether or not to close the index
Throws:
java.lang.Exception

indexIrmngDwcA

protected void indexIrmngDwcA(org.apache.lucene.index.IndexWriter iw,
                              java.lang.String archiveDirectory)
                       throws java.lang.Exception
Throws:
java.lang.Exception

getCommonNameDocument

protected org.apache.lucene.document.Document getCommonNameDocument(java.lang.String cn,
                                                                    java.lang.String sn,
                                                                    java.lang.String lsid,
                                                                    float boost)

getCommonNameDocument

protected org.apache.lucene.document.Document getCommonNameDocument(java.lang.String cn,
                                                                    java.lang.String sn,
                                                                    java.lang.String lsid,
                                                                    float boost,
                                                                    boolean checkAccepted)

createALAIndexDocument

public org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name,
                                                                  java.lang.String id,
                                                                  java.lang.String lsid,
                                                                  java.lang.String author,
                                                                  LinnaeanRankClassification cl)

createALAIndexDocument

public org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name,
                                                                  java.lang.String id,
                                                                  java.lang.String lsid,
                                                                  java.lang.String author,
                                                                  java.lang.String rank,
                                                                  java.lang.String rankId,
                                                                  java.lang.String left,
                                                                  java.lang.String right,
                                                                  LinnaeanRankClassification cl)

createALASynonymDocument

protected org.apache.lucene.document.Document createALASynonymDocument(java.lang.String scientificName,
                                                                       java.lang.String author,
                                                                       java.lang.String id,
                                                                       java.lang.String lsid,
                                                                       java.lang.String nameLsid,
                                                                       java.lang.String acceptedLsid,
                                                                       java.lang.String acceptedId,
                                                                       float boost,
                                                                       java.lang.String synonymType)

createALAIndexDocument

protected org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name,
                                                                     java.lang.String id,
                                                                     java.lang.String lsid,
                                                                     java.lang.String rank,
                                                                     java.lang.String rankString,
                                                                     java.lang.String kingdom,
                                                                     java.lang.String kid,
                                                                     java.lang.String phylum,
                                                                     java.lang.String pid,
                                                                     java.lang.String clazz,
                                                                     java.lang.String cid,
                                                                     java.lang.String order,
                                                                     java.lang.String oid,
                                                                     java.lang.String family,
                                                                     java.lang.String fid,
                                                                     java.lang.String genus,
                                                                     java.lang.String gid,
                                                                     java.lang.String species,
                                                                     java.lang.String sid,
                                                                     java.lang.String left,
                                                                     java.lang.String right,
                                                                     java.lang.String acceptedConcept,
                                                                     java.lang.String specificEpithet,
                                                                     java.lang.String infraspecificEpithet,
                                                                     java.lang.String author,
                                                                     float boost)

getIndexDirectory

public java.lang.String getIndexDirectory()

setIndexDirectory

public void setIndexDirectory(java.lang.String indexDirectory)

main

public static void main(java.lang.String[] args)
                 throws java.lang.Exception
Generates the Lucene index required for the name matching API. eg au.org.ala.checklist.lucene.CBCreateLuceneIndex "/data/exports" "/data/lucene/namematching" Extra optional args that should appear after the directory names -sn: Only create the indexes necessary for the scientific name lookups -cn: Only create the indexes necessary for the common name lookups

Parameters:
args -
Throws:
java.lang.Exception


Copyright © 2014. All Rights Reserved.