/*
 * Decompiled with CFR 0.152.
 */
package org.gbif.nameparser;

import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.gbif.api.model.checklistbank.ParsedName;
import org.gbif.api.vocabulary.Rank;
import org.gbif.utils.file.FileUtils;
import org.gbif.utils.rs.RsGbifOrg;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NormalisedNameParser {
    private static Logger LOG = LoggerFactory.getLogger(NormalisedNameParser.class);
    private static final ExecutorService THREAD_POOL = Executors.newCachedThreadPool();
    private TreeSet<String> monomials = new TreeSet(String.CASE_INSENSITIVE_ORDER);
    private final long timeout;
    protected static final String NAME_LETTERS = "A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152";
    protected static final String name_letters = "a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153";
    protected static final String AUTHOR_LETTERS = "A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}";
    protected static final String author_letters = "a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}";
    protected static final String AUTHOR_PREFIXES = "(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')";
    protected static final String AUTHOR = "(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)";
    protected static final String AUTHOR_TEAM = "(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)|al\\.?))*";
    protected static final Pattern AUTHOR_TEAM_PATTERN = Pattern.compile("^(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)|al\\.?))*$");
    protected static final String YEAR = "[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?";
    protected static final String RANK_MARKER_SPECIES = "(?:notho)?(?:" + StringUtils.join(Rank.RANK_MARKER_MAP_INFRASPECIFIC.keySet(), (String)"|") + "|agg)\\.?";
    protected static final String EPHITHET_PREFIXES = "van|novae";
    protected static final String EPHITHET = "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)";
    protected static final String MONOMIAL = "[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?";
    protected static final String INFRAGENERIC = "(?:\\( ?([A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153-]+) ?\\)|(" + StringUtils.join(Rank.RANK_MARKER_MAP_INFRAGENERIC.keySet(), (String)"|") + ")\\.? ?([" + "A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152" + "][" + "a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153" + "-]+)" + ")";
    protected static final String RANK_MARKER_ALL = "(notho)? *(" + StringUtils.join(Rank.RANK_MARKER_MAP.keySet(), (String)"|") + ")\\.?";
    private static final Pattern RANK_MARKER_ONLY = Pattern.compile("^" + RANK_MARKER_ALL + "$");
    public static final Pattern CANON_NAME_IGNORE_AUTHORS = Pattern.compile("^(\u00d7?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?)(?:(?<!ceae) " + INFRAGENERIC + ")?" + "(?: " + "(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')" + ")?" + "(?: (\u00d7?" + "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)" + "))?" + "(?: " + "(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')" + ")?" + "(?:" + "(?:" + ".*" + "( " + RANK_MARKER_SPECIES + "[ .])" + "(\u00d7?" + "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)" + ")" + ")" + "|" + " (\u00d7?" + "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)" + ")" + ")?");
    public static final Pattern NAME_PATTERN = Pattern.compile("^(\u00d7?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?)(?:(?<!ceae) " + INFRAGENERIC + ")?" + "(?: (\u00d7?" + "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)" + "))?" + "(?:" + "(?:" + "( .*?)?" + "( " + RANK_MARKER_SPECIES + ")" + ")?" + "(?: (\u00d7?\"?" + "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)" + "\"?))" + ")?" + "(,?" + "(?: ?\\(" + "(" + "(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)|al\\.?))*" + ")?" + ",?( ?" + "[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?" + ")?" + "\\))?" + "( " + "(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?(?: *: *(?:Pers|Fr)\\.?)?)|al\\.?))*" + ")?" + "(?: ?\\(?,? ?(" + "[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?" + ")\\)?)?" + ")" + "$");

    public NormalisedNameParser(long timeout) {
        this.timeout = timeout;
    }

    public boolean parseNormalisedName(ParsedName cn, String scientificName) {
        LOG.debug("Parse normed name string: {}", (Object)scientificName);
        FutureTask<Matcher> task = new FutureTask<Matcher>(new MatcherCallable(scientificName));
        THREAD_POOL.execute(task);
        try {
            Matcher matcher = task.get(this.timeout, TimeUnit.MILLISECONDS);
            if (matcher.group(0).equals(scientificName)) {
                String yearAsString;
                if (LOG.isDebugEnabled()) {
                    this.logMatcher(matcher);
                }
                cn.setGenusOrAbove(StringUtils.trimToNull((String)matcher.group(1)));
                boolean bracketSubrankFound = false;
                if (matcher.group(2) != null) {
                    bracketSubrankFound = true;
                    cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(2)));
                } else if (matcher.group(4) != null) {
                    String rank = StringUtils.trimToNull((String)matcher.group(3));
                    if (!rank.endsWith(".")) {
                        rank = rank + ".";
                    }
                    cn.setRankMarker(rank);
                    cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(4)));
                }
                cn.setSpecificEpithet(StringUtils.trimToNull((String)matcher.group(5)));
                if (matcher.group(7) != null && matcher.group(7).length() > 1) {
                    cn.setRankMarker(StringUtils.trimToNull((String)matcher.group(7)));
                }
                cn.setInfraSpecificEpithet(StringUtils.trimToNull((String)matcher.group(8)));
                cn.setBracketAuthorship(StringUtils.trimToNull((String)matcher.group(10)));
                if (bracketSubrankFound && cn.getBracketAuthorship() == null && cn.getSpecificEpithet() == null && !this.monomials.contains(cn.getInfraGeneric())) {
                    cn.setBracketAuthorship(cn.getInfraGeneric());
                    cn.setInfraGeneric(null);
                    LOG.debug("swapped subrank with bracket author: {}", (Object)cn.getBracketAuthorship());
                }
                if (matcher.group(11) != null && matcher.group(11).length() > 2) {
                    yearAsString = matcher.group(11).trim();
                    cn.setBracketYear(yearAsString);
                }
                cn.setAuthorship(StringUtils.trimToNull((String)matcher.group(12)));
                if (matcher.group(13) != null && matcher.group(13).length() > 2) {
                    yearAsString = matcher.group(13).trim();
                    cn.setYear(yearAsString);
                }
                this.lookForIrregularRankMarker(cn);
                this.checkEpithetVsAuthorPrefx(cn);
                return true;
            }
        }
        catch (InterruptedException e) {
            LOG.warn("InterruptedException for name: {}", (Object)scientificName, (Object)e);
        }
        catch (ExecutionException e) {
            LOG.warn("ExecutionException for name: {}", (Object)scientificName, (Object)e);
        }
        catch (IllegalStateException e) {
        }
        catch (TimeoutException e) {
            LOG.info("Parsing timeout for name: {}", (Object)scientificName);
        }
        return false;
    }

    public boolean parseNormalisedNameIgnoreAuthors(ParsedName cn, String scientificName) {
        LOG.debug("Parse normed name string ignoring authors: {}", (Object)scientificName);
        Matcher matcher = CANON_NAME_IGNORE_AUTHORS.matcher(scientificName);
        boolean matchFound = matcher.find();
        if (matchFound) {
            if (LOG.isDebugEnabled()) {
                this.logMatcher(matcher);
            }
            cn.setGenusOrAbove(StringUtils.trimToNull((String)matcher.group(1)));
            if (matcher.group(2) != null) {
                cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(2)));
                if (!this.monomials.contains(cn.getInfraGeneric())) {
                    cn.setInfraGeneric(null);
                }
            } else if (matcher.group(4) != null) {
                String rank = StringUtils.trimToNull((String)matcher.group(3));
                cn.setRankMarker(rank);
                cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(4)));
            }
            cn.setSpecificEpithet(StringUtils.trimToNull((String)matcher.group(5)));
            if (matcher.group(6) != null && matcher.group(6).length() > 1) {
                cn.setRankMarker(StringUtils.trimToNull((String)matcher.group(6)));
            }
            if (matcher.group(7) != null && matcher.group(7).length() >= 2) {
                NormalisedNameParser.setCanonicalInfraSpecies(cn, matcher.group(7));
            } else {
                NormalisedNameParser.setCanonicalInfraSpecies(cn, matcher.group(8));
            }
            this.lookForIrregularRankMarker(cn);
            return true;
        }
        return false;
    }

    private static void setCanonicalInfraSpecies(ParsedName pn, String epi) {
        if (epi == null || epi.equalsIgnoreCase("sec") || epi.equalsIgnoreCase("sensu")) {
            return;
        }
        pn.setInfraSpecificEpithet(StringUtils.trimToNull((String)epi));
    }

    private void lookForIrregularRankMarker(ParsedName cn) {
        if (cn.getRankMarker() == null) {
            Matcher m;
            if (cn.getInfraSpecificEpithet() != null) {
                Matcher m2 = RANK_MARKER_ONLY.matcher(cn.getInfraSpecificEpithet());
                if (m2.find()) {
                    cn.setRankMarker(cn.getInfraSpecificEpithet());
                    cn.setInfraSpecificEpithet(null);
                }
            } else if (cn.getSpecificEpithet() != null && (m = RANK_MARKER_ONLY.matcher(cn.getSpecificEpithet())).find()) {
                cn.setRankMarker(cn.getSpecificEpithet());
                cn.setSpecificEpithet(null);
            }
        }
    }

    private void checkEpithetVsAuthorPrefx(ParsedName cn) {
        if (cn.getRankMarker() == null) {
            if (cn.getInfraSpecificEpithet() != null) {
                String extendedAuthor = cn.getInfraSpecificEpithet() + " " + cn.getAuthorship();
                Matcher m = AUTHOR_TEAM_PATTERN.matcher(extendedAuthor);
                if (m.find()) {
                    LOG.debug("use infraspecific epithet as author prefix");
                    cn.setInfraSpecificEpithet(null);
                    cn.setAuthorship(extendedAuthor);
                }
            } else {
                String extendedAuthor = cn.getSpecificEpithet() + " " + cn.getAuthorship();
                Matcher m = AUTHOR_TEAM_PATTERN.matcher(extendedAuthor);
                if (m.find()) {
                    LOG.debug("use specific epithet as author prefix");
                    cn.setSpecificEpithet(null);
                    cn.setAuthorship(extendedAuthor);
                }
            }
        }
    }

    private void logMatcher(Matcher matcher) {
        int i = -1;
        while (i < matcher.groupCount()) {
            LOG.debug("  {}: >{}<", (Object)(++i), (Object)matcher.group(i));
        }
    }

    public void addMonomials(Set<String> monomials) {
        this.monomials.addAll(monomials);
    }

    public void readMonomialsRsGbifOrg() {
        Set names;
        InputStream in;
        this.monomials.clear();
        try {
            in = RsGbifOrg.authorityUrl((String)"suprageneric.txt").openStream();
            names = FileUtils.streamToSet((InputStream)in);
            this.addMonomials(names);
            LOG.debug("Loaded " + names.size() + " suprageneric names from rs.gbif.org into NameParser");
        }
        catch (IOException e) {
            LOG.warn("Couldn't read suprageneric names dictionary from rs.gbif.org to feed into NameParser: " + e.getMessage());
        }
        catch (Exception e) {
            LOG.warn("Error supplying NameParser with suprageneric names from rs.gbif.org", (Throwable)e);
        }
        try {
            in = RsGbifOrg.authorityUrl((String)"genera.txt").openStream();
            names = FileUtils.streamToSet((InputStream)in);
            this.addMonomials(names);
            LOG.debug("Loaded " + names.size() + " generic names from rs.gbif.org into NameParser");
        }
        catch (IOException e) {
            LOG.warn("Couldn't read generic names dictionary from rs.gbif.org to feed into NameParser: " + e.getMessage());
        }
        catch (Exception e) {
            LOG.warn("Error supplying NameParser with generic names from rs.gbif.org", (Throwable)e);
        }
    }

    public void setMonomials(Set<String> monomials) {
        this.monomials.clear();
        this.monomials.addAll(monomials);
    }

    private class MatcherCallable
    implements Callable<Matcher> {
        private final String scientificName;

        MatcherCallable(String scientificName) {
            this.scientificName = scientificName;
        }

        @Override
        public Matcher call() throws Exception {
            Matcher matcher = NAME_PATTERN.matcher(this.scientificName);
            matcher.find();
            return matcher;
        }
    }
}

