au.org.ala.checklist.lucene
Class CBCreateLuceneIndex
java.lang.Object
au.org.ala.checklist.lucene.CBCreateLuceneIndex
- Direct Known Subclasses:
- DwcaNameIndexer
public class CBCreateLuceneIndex
- extends java.lang.Object
Creates the Lucene index based on the cb_names export generated by ChecklistBankExporter
- Author:
- Natasha
|
Method Summary |
void |
addAdditionalName(java.lang.String lsid,
java.lang.String scientificName,
java.lang.String author,
LinnaeanRankClassification cl)
|
void |
commit()
|
void |
commit(boolean close,
boolean merge)
|
org.apache.lucene.document.Document |
createALAIndexDocument(java.lang.String name,
java.lang.String id,
java.lang.String lsid,
java.lang.String author,
LinnaeanRankClassification cl)
|
org.apache.lucene.document.Document |
createALAIndexDocument(java.lang.String name,
java.lang.String id,
java.lang.String lsid,
java.lang.String author,
java.lang.String rank,
java.lang.String rankId,
java.lang.String left,
java.lang.String right,
LinnaeanRankClassification cl)
|
protected org.apache.lucene.document.Document |
createALAIndexDocument(java.lang.String name,
java.lang.String id,
java.lang.String lsid,
java.lang.String rank,
java.lang.String rankString,
java.lang.String kingdom,
java.lang.String kid,
java.lang.String phylum,
java.lang.String pid,
java.lang.String clazz,
java.lang.String cid,
java.lang.String order,
java.lang.String oid,
java.lang.String family,
java.lang.String fid,
java.lang.String genus,
java.lang.String gid,
java.lang.String species,
java.lang.String sid,
java.lang.String left,
java.lang.String right,
java.lang.String acceptedConcept,
java.lang.String specificEpithet,
java.lang.String infraspecificEpithet,
java.lang.String author,
float boost)
|
protected org.apache.lucene.document.Document |
createALASynonymDocument(java.lang.String scientificName,
java.lang.String author,
java.lang.String id,
java.lang.String lsid,
java.lang.String nameLsid,
java.lang.String acceptedLsid,
java.lang.String acceptedId,
float boost,
java.lang.String synonymType)
|
void |
createIndex(java.lang.String exportsDir,
java.lang.String indexDir,
boolean generateSciNames,
boolean generateCommonNames)
Creates the index from the specified checklist bank names usage export file into
the specified index directory. |
void |
createIndex(java.lang.String exportsDir,
java.lang.String indexDir,
java.lang.String acceptedFile,
java.lang.String synonymFile,
java.lang.String irmngDwca,
boolean generateSciNames,
boolean generateCommonNames)
|
protected org.apache.lucene.index.IndexWriter |
createIndexWriter(java.io.File directory,
org.apache.lucene.analysis.Analyzer analyzer,
boolean replace)
Creates an index writer in the specified directory. |
void |
createIrmngIndex(java.lang.String exportsDir,
java.lang.String indexDir)
|
void |
deleteName(java.lang.String lsid)
Deletes the entry that has the supplied lsid. |
protected org.apache.lucene.document.Document |
getCommonNameDocument(java.lang.String cn,
java.lang.String sn,
java.lang.String lsid,
float boost)
|
protected org.apache.lucene.document.Document |
getCommonNameDocument(java.lang.String cn,
java.lang.String sn,
java.lang.String lsid,
float boost,
boolean checkAccepted)
|
java.lang.String |
getIndexDirectory()
|
protected void |
indexIrmngDwcA(org.apache.lucene.index.IndexWriter iw,
java.lang.String archiveDirectory)
|
void |
init()
|
static void |
main(java.lang.String[] args)
Generates the Lucene index required for the name matching API. |
void |
setIndexDirectory(java.lang.String indexDirectory)
|
| Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
cbExportFile
protected java.lang.String cbExportFile
lexFile
protected java.lang.String lexFile
irmngFile
protected java.lang.String irmngFile
colFile
protected java.lang.String colFile
extraALAConcepts
protected java.lang.String extraALAConcepts
alaConcepts
protected java.lang.String alaConcepts
alaSynonyms
protected java.lang.String alaSynonyms
irmngDwcaDirectory
protected java.lang.String irmngDwcaDirectory
afdFile
protected java.lang.String afdFile
apniFile
protected java.lang.String apniFile
taxonConeptName
protected java.lang.String taxonConeptName
log
protected org.apache.commons.logging.Log log
dataSource
protected javax.sql.DataSource dataSource
CBCreateLuceneIndex
public CBCreateLuceneIndex()
init
public void init()
throws java.lang.Exception
- Throws:
java.lang.Exception
createIndex
public void createIndex(java.lang.String exportsDir,
java.lang.String indexDir,
boolean generateSciNames,
boolean generateCommonNames)
throws java.lang.Exception
- Creates the index from the specified checklist bank names usage export file into
the specified index directory.
- Parameters:
cbExportFile - A cb export file as generated from the ChecklistBankExporterlexFile - irmngFile - indexDir - The directory in which the 2 indices will be created.
- Throws:
java.lang.Exception
createIrmngIndex
public void createIrmngIndex(java.lang.String exportsDir,
java.lang.String indexDir)
throws java.lang.Exception
- Throws:
java.lang.Exception
createIndex
public void createIndex(java.lang.String exportsDir,
java.lang.String indexDir,
java.lang.String acceptedFile,
java.lang.String synonymFile,
java.lang.String irmngDwca,
boolean generateSciNames,
boolean generateCommonNames)
throws java.lang.Exception
- Throws:
java.lang.Exception
createIndexWriter
protected org.apache.lucene.index.IndexWriter createIndexWriter(java.io.File directory,
org.apache.lucene.analysis.Analyzer analyzer,
boolean replace)
throws java.lang.Exception
- Creates an index writer in the specified directory. It will create/recreate
the target directory
- Parameters:
directory - analyzer -
- Returns:
-
- Throws:
java.lang.Exception
addAdditionalName
public void addAdditionalName(java.lang.String lsid,
java.lang.String scientificName,
java.lang.String author,
LinnaeanRankClassification cl)
throws java.lang.Exception
- Throws:
java.lang.Exception
deleteName
public void deleteName(java.lang.String lsid)
throws java.lang.Exception
- Deletes the entry that has the supplied lsid. It will also delete all the synonyms associated with it
- Parameters:
lsid -
- Throws:
java.lang.Exception
commit
public void commit()
throws java.lang.Exception
- Throws:
java.lang.Exception
commit
public void commit(boolean close,
boolean merge)
throws java.lang.Exception
- Parameters:
merge - whether or not to merge the indexclose - whether or not to close the index
- Throws:
java.lang.Exception
indexIrmngDwcA
protected void indexIrmngDwcA(org.apache.lucene.index.IndexWriter iw,
java.lang.String archiveDirectory)
throws java.lang.Exception
- Throws:
java.lang.Exception
getCommonNameDocument
protected org.apache.lucene.document.Document getCommonNameDocument(java.lang.String cn,
java.lang.String sn,
java.lang.String lsid,
float boost)
getCommonNameDocument
protected org.apache.lucene.document.Document getCommonNameDocument(java.lang.String cn,
java.lang.String sn,
java.lang.String lsid,
float boost,
boolean checkAccepted)
createALAIndexDocument
public org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name,
java.lang.String id,
java.lang.String lsid,
java.lang.String author,
LinnaeanRankClassification cl)
createALAIndexDocument
public org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name,
java.lang.String id,
java.lang.String lsid,
java.lang.String author,
java.lang.String rank,
java.lang.String rankId,
java.lang.String left,
java.lang.String right,
LinnaeanRankClassification cl)
createALASynonymDocument
protected org.apache.lucene.document.Document createALASynonymDocument(java.lang.String scientificName,
java.lang.String author,
java.lang.String id,
java.lang.String lsid,
java.lang.String nameLsid,
java.lang.String acceptedLsid,
java.lang.String acceptedId,
float boost,
java.lang.String synonymType)
createALAIndexDocument
protected org.apache.lucene.document.Document createALAIndexDocument(java.lang.String name,
java.lang.String id,
java.lang.String lsid,
java.lang.String rank,
java.lang.String rankString,
java.lang.String kingdom,
java.lang.String kid,
java.lang.String phylum,
java.lang.String pid,
java.lang.String clazz,
java.lang.String cid,
java.lang.String order,
java.lang.String oid,
java.lang.String family,
java.lang.String fid,
java.lang.String genus,
java.lang.String gid,
java.lang.String species,
java.lang.String sid,
java.lang.String left,
java.lang.String right,
java.lang.String acceptedConcept,
java.lang.String specificEpithet,
java.lang.String infraspecificEpithet,
java.lang.String author,
float boost)
getIndexDirectory
public java.lang.String getIndexDirectory()
setIndexDirectory
public void setIndexDirectory(java.lang.String indexDirectory)
main
public static void main(java.lang.String[] args)
throws java.lang.Exception
- Generates the Lucene index required for the name matching API.
eg
au.org.ala.checklist.lucene.CBCreateLuceneIndex "/data/exports" "/data/lucene/namematching"
Extra optional args that should appear after the directory names
-sn: Only create the indexes necessary for the scientific name lookups
-cn: Only create the indexes necessary for the common name lookups
- Parameters:
args -
- Throws:
java.lang.Exception
Copyright © 2014. All Rights Reserved.