/*
 * Decompiled with CFR 0.152.
 */
package org.gbif.ecat.parser;

import com.google.common.base.Strings;
import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.gbif.ecat.cfg.RsGbifOrg;
import org.gbif.ecat.model.ParsedName;
import org.gbif.ecat.parser.UnparsableException;
import org.gbif.ecat.voc.NameType;
import org.gbif.ecat.voc.Rank;
import org.gbif.utils.file.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NameParser {
    protected static Logger log = LoggerFactory.getLogger(NameParser.class);
    public boolean debug = false;
    private TreeSet<String> MONOMIALS = new TreeSet(String.CASE_INSENSITIVE_ORDER);
    private static char[] QUOTES = new char[4];
    protected static final String NAME_LETTERS = "A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152";
    protected static final String name_letters = "a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153";
    protected static final String AUTHOR_LETTERS = "A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}";
    protected static final String author_letters = "a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}";
    protected static final String all_letters_numbers = "a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u01520-9";
    protected static final String AUTHOR_PREFIXES = "(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')";
    protected static final String AUTHOR = "(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)";
    protected static final String AUTHOR_TEAM = "(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)|al\\.?))*";
    protected static final Pattern AUTHOR_TEAM_PATTERN;
    protected static final String YEAR = "[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?";
    private static final String RANK_MARKER_ALL;
    private static final Pattern RANK_MARKER;
    private static final Pattern RANK_MARKER_AT_END;
    protected static final String RANK_MARKER_SPECIES;
    protected static final String EPHITHET_PREFIXES = "van|novae";
    protected static final String EPHITHET = "(?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex)";
    protected static final String MONOMIAL = "[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?";
    protected static final String INFRAGENERIC;
    public static final String HYBRID_MARKER = "\u00d7";
    public static final Pattern HYBRID_FORMULA_PATTERN;
    protected static final Pattern CULTIVAR;
    public static final Pattern IS_VIRUS_PATTERN;
    private static final Pattern IS_VIRUS_PATTERN_POSTFAIL;
    private static final String SENSU = "(s\\.(?:l\\.|str\\.)|sensu\\s+(?:latu|strictu?)|(sec|sensu|auct|non)((\\.|\\s)(.*))?)";
    private static final Pattern EXTRACT_SENSU;
    protected static final Pattern EXTRACT_NOMSTATUS;
    private static final Pattern EXTRACT_REMARKS;
    private static final Pattern EXTRACT_YEAR;
    private static final Pattern COMMA_BEFORE_YEAR;
    private static final Pattern REPLACE_QUOTES;
    private static final Pattern NORM_QUOTES;
    private static final Pattern NORM_UPPERCASE_WORDS;
    private static final Pattern NORM_WHITESPACE;
    private static final Pattern NORM_NO_SQUARE_BRACKETS;
    private static final Pattern NORM_BRACKETS_OPEN;
    private static final Pattern NORM_BRACKETS_CLOSE;
    private static final Pattern NORM_BRACKETS_OPEN_STRONG;
    private static final Pattern NORM_BRACKETS_CLOSE_STRONG;
    private static final Pattern NORM_AND;
    private static final Pattern NORM_ET_AL;
    private static final Pattern NORM_AMPERSAND_WS;
    private static final Pattern NORM_HYPHENS;
    private static final Pattern NORM_SUBGENUS;
    private static final Pattern NO_Q_MARKS;
    private static final Pattern NORM_COMMAS;
    private static final String AUTHOR_STRONG = "[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}]{2,}\\.?";
    private static final Pattern NORM_ORIG_AUTH;
    private static final Pattern NORM_ORIG_AUTH2;
    private static final Pattern NORM_IMPRINT_YEAR;
    private static final Pattern NORM_HYBRIDS_GENUS;
    private static final Pattern NORM_HYBRIDS_EPITH;
    private static final Pattern NORM_HYBRIDS_FORM;
    private static final Pattern NORM_INDET;
    private static final Pattern NORM_DOTS;
    private static final Pattern NORM_TF_GENUS;
    private static final Pattern NORM_IN_BIB;
    private static final Pattern NORM_PREFIXES;
    private static final Pattern NORM_SUFFIXES;
    private static final Pattern BLACKLISTED;
    private static final Pattern DOUBTFUL;
    private static final String[] badNameParts;
    private static final Pattern XML_TAGS;
    private static final Pattern FIRST_WORD;
    private static final String WEIRD_CHARS = "[\u00a7$%/#+!;:_|\"=*]";
    private static final Pattern NORM_WEIRD_CHARS;
    private static final Pattern COMB_BAS_AUTHOR_SWAP;
    public static final Pattern CANON_NAME_IGNORE_AUTHORS;
    public static final Pattern NAME_PATTERN;

    private static String cutoffBadSuffices(String name) {
        boolean done = false;
        String lowercase = name.toLowerCase();
        int cuttoff = lowercase.length();
        while (!done) {
            done = true;
            for (String bad : badNameParts) {
                if (!lowercase.endsWith(" " + bad)) continue;
                int remove = bad.length() + 1;
                lowercase = lowercase.substring(0, cuttoff -= remove);
                done = false;
            }
        }
        if (cuttoff != name.length()) {
            name = name.substring(0, cuttoff);
        }
        return name;
    }

    protected static String cleanStrong(String name) {
        if (name != null) {
            name = NameParser.cutoffBadSuffices(name);
            Matcher m = FIRST_WORD.matcher(name = NORM_WEIRD_CHARS.matcher(name).replaceAll(" "));
            if (m.find() && m.group(2) == null) {
                name = m.replaceFirst(StringUtils.defaultString((String)m.group(1)) + m.group(3).toUpperCase() + m.group(4).toLowerCase() + " ");
            }
            if ((m = NORM_HYBRIDS_GENUS.matcher(name)).find()) {
                name = m.replaceFirst("\u00d7$1");
            }
        }
        return name;
    }

    public static void main(String[] args) {
        NameParser parser = new NameParser();
        parser.debug = true;
        if (args.length == 0) {
            args = new String[]{"Abies brevifolia cv. ex Dallim.", "Abies brevifolia cv. excelsia Dallim."};
        }
        for (String name : args) {
            System.out.println("\n\nIN   : " + name);
            ParsedName pn = null;
            try {
                pn = parser.parse(name);
            }
            catch (UnparsableException e) {
                e.printStackTrace();
            }
            System.out.println("NORM : " + NameParser.normalize(name));
            if (pn != null) {
                System.out.println("FULL : " + pn);
                continue;
            }
            System.out.println("FULL : CANNOT PARSE");
        }
    }

    public static String normalize(String name) {
        if (name == null) {
            return null;
        }
        Matcher m = NORM_DOTS.matcher(name = org.gbif.ecat.utils.StringUtils.unescapeUnicodeChars(name));
        if (m.find()) {
            name = m.replaceAll("$1. ");
        }
        if ((m = COMMA_BEFORE_YEAR.matcher(name)).find()) {
            name = m.replaceAll("$1, $2");
        }
        name = NORM_HYPHENS.matcher(name).replaceAll("-");
        m = NORM_BRACKETS_OPEN.matcher(name = NORM_AMPERSAND_WS.matcher(name).replaceAll(" & "));
        if (m.find()) {
            name = m.replaceAll(" $1");
        }
        if ((m = NORM_BRACKETS_CLOSE.matcher(name)).find()) {
            name = m.replaceAll("$1 ");
        }
        if ((m = NORM_COMMAS.matcher(name)).find()) {
            name = m.replaceAll(", ");
        }
        if ((m = NORM_HYBRIDS_GENUS.matcher(name)).find()) {
            name = m.replaceFirst("\u00d7$1");
        }
        if ((m = NORM_HYBRIDS_EPITH.matcher(name)).find()) {
            name = m.replaceFirst("$1 \u00d7$2");
        }
        if ((m = NORM_HYBRIDS_FORM.matcher(name)).find()) {
            name = m.replaceAll(" \u00d7 ");
        }
        m = NORM_UPPERCASE_WORDS.matcher(name);
        while (m.find()) {
            name = name.replaceFirst(m.group(0), m.group(1) + m.group(2).toLowerCase());
        }
        name = NORM_WHITESPACE.matcher(name).replaceAll(" ");
        return StringUtils.trimToEmpty((String)name);
    }

    protected static String normalizeStrong(String name) {
        if (name == null) {
            return null;
        }
        name = NORM_QUOTES.matcher(name).replaceAll("'");
        Matcher m = NO_Q_MARKS.matcher(name = REPLACE_QUOTES.matcher(name).replaceAll(""));
        if (m.find()) {
            name = m.replaceAll("$1");
        }
        if ((m = NORM_TF_GENUS.matcher(name = NORM_PREFIXES.matcher(name).replaceAll(""))).find()) {
            name = m.replaceAll("$1$2 ");
        }
        if ((m = NORM_IMPRINT_YEAR.matcher(name)).find()) {
            name = m.replaceAll("$1");
        }
        m = NORM_UPPERCASE_WORDS.matcher(name);
        while (m.find()) {
            name = name.replaceFirst(m.group(0), m.group(1) + m.group(2).toLowerCase());
        }
        m = EXTRACT_YEAR.matcher(name);
        if (m.find() && name.length() < 80) {
            m = NORM_ORIG_AUTH.matcher(name);
            if (m.find()) {
                name = m.replaceAll("($1 $2)");
            }
            if ((m = NORM_ORIG_AUTH2.matcher(name)).find()) {
                name = m.replaceAll("($1 $2)");
            }
        }
        name = NORM_NO_SQUARE_BRACKETS.matcher(name).replaceAll(" $1 ");
        name = NORM_BRACKETS_OPEN_STRONG.matcher(name).replaceAll(" (");
        name = NORM_BRACKETS_CLOSE_STRONG.matcher(name).replaceAll(") ");
        name = NORM_AND.matcher(name).replaceAll(" & ");
        name = NORM_ET_AL.matcher(name).replaceAll(" et al.");
        m = NORM_SUBGENUS.matcher(name = NORM_SUFFIXES.matcher(name).replaceAll(""));
        if (m.find()) {
            name = m.replaceAll("$1 ($2) $3");
        }
        name = NameParser.normalize(name);
        return StringUtils.trimToEmpty((String)name);
    }

    protected static String preClean(String name) {
        name = org.gbif.ecat.utils.StringUtils.unescapeUnicodeChars(name);
        name = XML_TAGS.matcher(name).replaceAll("");
        name = name.trim();
        for (char c : QUOTES) {
            int idx;
            for (idx = 0; idx < name.length() && (c == name.charAt(idx) || Character.isWhitespace(name.charAt(idx))); ++idx) {
            }
            if (idx <= 0) continue;
            int end = 0;
            while (c == name.charAt(name.length() - 1 - end) && name.length() - idx - end > 0) {
                ++end;
            }
            name = name.substring(idx, name.length() - end);
        }
        name = NORM_WHITESPACE.matcher(name).replaceAll(" ");
        return StringUtils.trimToEmpty((String)name);
    }

    private static void setCanonicalInfraSpecies(ParsedName pn, String epi) {
        if (epi == null || epi.equalsIgnoreCase("sec") || epi.equalsIgnoreCase("sensu")) {
            return;
        }
        pn.setInfraSpecificEpithet(StringUtils.trimToNull((String)epi));
    }

    public void addMonomials(Set<String> monomials) {
        this.MONOMIALS.addAll(monomials);
    }

    private void checkEpithetVsAuthorPrefx(ParsedName cn) {
        if (cn.getRankMarker() == null) {
            if (cn.getInfraSpecificEpithet() != null) {
                String extendedAuthor = cn.getInfraSpecificEpithet() + " " + cn.getAuthorship();
                Matcher m = AUTHOR_TEAM_PATTERN.matcher(extendedAuthor);
                if (m.find()) {
                    if (this.debug) {
                        System.out.println("use infraspecific epithet as author prefix");
                    }
                    cn.setInfraSpecificEpithet(null);
                    cn.setAuthorship(extendedAuthor);
                }
            } else {
                String extendedAuthor = cn.getSpecificEpithet() + " " + cn.getAuthorship();
                Matcher m = AUTHOR_TEAM_PATTERN.matcher(extendedAuthor);
                if (m.find()) {
                    if (this.debug) {
                        System.out.println("use specific epithet as author prefix");
                    }
                    cn.setSpecificEpithet(null);
                    cn.setAuthorship(extendedAuthor);
                }
            }
        }
    }

    public Set<String> getMonomials() {
        return this.MONOMIALS;
    }

    private void lookForIrregularRankMarker(ParsedName cn) {
        if (cn.getRankMarker() == null) {
            Matcher m;
            if (cn.getInfraSpecificEpithet() != null) {
                Matcher m2 = RANK_MARKER.matcher(cn.getInfraSpecificEpithet());
                if (m2.find()) {
                    cn.setRankAndRankMarker(cn.getInfraSpecificEpithet());
                    cn.setInfraSpecificEpithet(null);
                }
            } else if (cn.getSpecificEpithet() != null && (m = RANK_MARKER.matcher(cn.getSpecificEpithet())).find()) {
                cn.setRankAndRankMarker(cn.getSpecificEpithet());
                cn.setSpecificEpithet(null);
            }
        }
    }

    public <T> ParsedName<T> parse(String scientificName) throws UnparsableException {
        if (Strings.isNullOrEmpty((String)scientificName)) {
            throw new UnparsableException(null, scientificName);
        }
        long start = 0L;
        if (this.debug) {
            start = System.currentTimeMillis();
        }
        String name = NameParser.preClean(scientificName);
        if (Strings.isNullOrEmpty((String)(name = NameParser.normalize(name)))) {
            throw new UnparsableException(null, scientificName);
        }
        ParsedName pn = new ParsedName();
        Matcher m = CULTIVAR.matcher(name);
        if (m.find()) {
            pn.cultivar = m.group(1);
            name = m.replaceFirst(" ");
            pn.type = NameType.cultivar;
            pn.setRank(Rank.Cultivar);
            if (this.debug) {
                System.out.println("Cultivar:" + pn.cultivar);
            }
        }
        if ((m = BLACKLISTED.matcher(name)).find()) {
            throw new UnparsableException(NameType.blacklisted, scientificName);
        }
        m = HYBRID_FORMULA_PATTERN.matcher(name);
        if (m.find()) {
            throw new UnparsableException(NameType.hybrid, scientificName);
        }
        m = IS_VIRUS_PATTERN.matcher(name);
        if (m.find()) {
            throw new UnparsableException(NameType.virus, scientificName);
        }
        m = EXTRACT_NOMSTATUS.matcher(name);
        if (m.find()) {
            pn.nomStatus = StringUtils.trimToNull((String)m.group(1));
            name = m.replaceFirst("");
        }
        if ((m = EXTRACT_SENSU.matcher(name)).find()) {
            pn.sensu = StringUtils.trimToNull((String)m.group(1));
            name = m.replaceFirst("");
        }
        if ((m = EXTRACT_REMARKS.matcher(name)).find()) {
            pn.remarks = StringUtils.trimToNull((String)m.group(1));
            name = m.replaceFirst("");
        }
        if ((m = RANK_MARKER_AT_END.matcher(name)).find() && !name.endsWith(" f.") && !name.endsWith(" f")) {
            pn.type = NameType.informal;
            pn.setRankMarker(m.group(2));
            name = m.replaceAll(" ");
        }
        if ((m = NORM_INDET.matcher(name)).find()) {
            pn.type = NameType.informal;
            name = m.replaceAll(" ");
        }
        if (Strings.isNullOrEmpty((String)(name = NameParser.normalizeStrong(name)))) {
            throw new UnparsableException(null, scientificName);
        }
        Rank origRank = pn.getRank();
        int passNo = 1;
        boolean parsed = this.parseNormalisedName(pn, name);
        if (!parsed) {
            if (this.debug) {
                System.out.println("Can't parse, use dirty normalizer");
            }
            String deDirtedName = NameParser.cleanStrong(name);
            parsed = this.parseNormalisedName(pn, deDirtedName);
            ++passNo;
            if (!parsed) {
                if (this.debug) {
                    System.out.println("Still can't parse, try to ignore authors");
                }
                parsed = this.parseNormalisedNameIgnoreAuthors(pn, deDirtedName);
                ++passNo;
                pn.authorsParsed = false;
                if (!parsed) {
                    m = IS_VIRUS_PATTERN_POSTFAIL.matcher(name);
                    if (m.find()) {
                        throw new UnparsableException(NameType.virus, scientificName);
                    }
                    throw new UnparsableException(null, scientificName);
                }
            }
        }
        if (origRank != null) {
            pn.setRank(origRank);
        }
        this.postAssertParsing(pn, scientificName, name, passNo);
        if (pn.type == null) {
            m = DOUBTFUL.matcher(scientificName);
            pn.type = !m.find() ? NameType.doubtful : (scientificName.equals(pn.canonicalNameWithAuthorship()) ? NameType.wellformed : NameType.sciname);
        }
        if (this.debug) {
            System.out.println("Parsing time: " + (System.currentTimeMillis() - start));
        }
        return pn;
    }

    private void postAssertParsing(ParsedName<?> pn, String rawName, String normedName, int passNo) throws UnparsableException {
        if (pn.genusOrAbove != null && !pn.isBinomial() && Character.isLowerCase(normedName.charAt(0))) {
            throw new UnparsableException(null, rawName);
        }
    }

    private boolean parseNormalisedName(ParsedName<?> cn, String scientificName) {
        Matcher matcher;
        if (this.debug) {
            System.out.println("Parse normed name string: " + scientificName);
        }
        if ((matcher = NAME_PATTERN.matcher(scientificName)).find() && matcher.group(0).equals(scientificName)) {
            String yearAsString;
            if (this.debug) {
                int i = -1;
                while (i < matcher.groupCount()) {
                    System.out.println("   " + ++i + ": >" + matcher.group(i) + "<");
                }
            }
            cn.setGenusOrAbove(StringUtils.trimToNull((String)matcher.group(1)));
            boolean bracketSubrankFound = false;
            if (matcher.group(2) != null) {
                bracketSubrankFound = true;
                cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(2)));
            } else if (matcher.group(4) != null) {
                String rank = StringUtils.trimToNull((String)matcher.group(3));
                if (!rank.endsWith(".")) {
                    rank = rank + ".";
                }
                cn.setRankAndRankMarker(rank);
                cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(4)));
            }
            cn.setSpecificEpithet(StringUtils.trimToNull((String)matcher.group(5)));
            if (matcher.group(7) != null && matcher.group(7).length() > 1) {
                cn.setRankAndRankMarker(StringUtils.trimToNull((String)matcher.group(7)));
            }
            cn.setInfraSpecificEpithet(StringUtils.trimToNull((String)matcher.group(8)));
            cn.setBracketAuthorship(StringUtils.trimToNull((String)matcher.group(10)));
            if (bracketSubrankFound && cn.getBracketAuthorship() == null && cn.getSpecificEpithet() == null && !this.MONOMIALS.contains(cn.getInfraGeneric())) {
                cn.setBracketAuthorship(cn.getInfraGeneric());
                cn.setInfraGeneric(null);
                if (this.debug) {
                    System.out.println("swapped subrank with bracket author: " + cn.getBracketAuthorship());
                }
            }
            if (matcher.group(11) != null && matcher.group(11).length() > 2) {
                yearAsString = matcher.group(11).trim();
                cn.setBracketYear(yearAsString);
            }
            cn.setAuthorship(StringUtils.trimToNull((String)matcher.group(12)));
            if (matcher.group(13) != null && matcher.group(13).length() > 2) {
                yearAsString = matcher.group(13).trim();
                cn.setYear(yearAsString);
            }
            this.lookForIrregularRankMarker(cn);
            this.checkEpithetVsAuthorPrefx(cn);
            return true;
        }
        return false;
    }

    private boolean parseNormalisedNameIgnoreAuthors(ParsedName<?> cn, String scientificName) {
        Matcher matcher;
        boolean matchFound;
        if (this.debug) {
            System.out.println("Parse normed name string ignoring authors: " + scientificName);
        }
        if (matchFound = (matcher = CANON_NAME_IGNORE_AUTHORS.matcher(scientificName)).find()) {
            if (this.debug) {
                int i = -1;
                while (i < matcher.groupCount()) {
                    System.out.println("   " + ++i + ": >" + matcher.group(i) + "<");
                }
            }
            cn.setGenusOrAbove(StringUtils.trimToNull((String)matcher.group(1)));
            if (matcher.group(2) != null) {
                cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(2)));
                if (!this.MONOMIALS.contains(cn.getInfraGeneric())) {
                    cn.setInfraGeneric(null);
                }
            } else if (matcher.group(4) != null) {
                String rank = StringUtils.trimToNull((String)matcher.group(3));
                cn.setRankMarker(rank);
                cn.setInfraGeneric(StringUtils.trimToNull((String)matcher.group(4)));
            }
            cn.setSpecificEpithet(StringUtils.trimToNull((String)matcher.group(5)));
            if (matcher.group(6) != null && matcher.group(6).length() > 1) {
                cn.setRankMarker(StringUtils.trimToNull((String)matcher.group(6)));
            }
            if (matcher.group(7) != null && matcher.group(7).length() >= 2) {
                NameParser.setCanonicalInfraSpecies(cn, matcher.group(7));
            } else {
                NameParser.setCanonicalInfraSpecies(cn, matcher.group(8));
            }
            this.lookForIrregularRankMarker(cn);
            return true;
        }
        return false;
    }

    public String parseToCanonical(String scientificName) {
        if (Strings.isNullOrEmpty((String)scientificName)) {
            return null;
        }
        try {
            ParsedName pn = this.parse(scientificName);
            if (pn != null) {
                return pn.canonicalName();
            }
        }
        catch (UnparsableException e) {
            log.warn("Unparsable name " + scientificName + " >>> " + e.getMessage());
        }
        return null;
    }

    public void readMonomialsRsGbifOrg() {
        Set names;
        InputStream in;
        this.MONOMIALS.clear();
        try {
            in = RsGbifOrg.authorityUrl("suprageneric.txt").openStream();
            names = FileUtils.streamToSet((InputStream)in);
            this.addMonomials(names);
            log.debug("Loaded " + names.size() + " suprageneric names from rs.gbif.org into NameParser");
        }
        catch (IOException e) {
            log.warn("Couldn't read suprageneric names dictionary from rs.gbif.org to feed into NameParser: " + e.getMessage());
        }
        catch (Exception e) {
            log.warn("Error supplying NameParser with suprageneric names from rs.gbif.org", (Throwable)e);
        }
        try {
            in = RsGbifOrg.authorityUrl("genera.txt").openStream();
            names = FileUtils.streamToSet((InputStream)in);
            this.addMonomials(names);
            log.debug("Loaded " + names.size() + " generic names from rs.gbif.org into NameParser");
        }
        catch (IOException e) {
            log.warn("Couldn't read generic names dictionary from rs.gbif.org to feed into NameParser: " + e.getMessage());
        }
        catch (Exception e) {
            log.warn("Error supplying NameParser with generic names from rs.gbif.org", (Throwable)e);
        }
    }

    public void setMonomials(Set<String> monomials) {
        this.MONOMIALS.clear();
        this.MONOMIALS.addAll(monomials);
    }

    static {
        NameParser.QUOTES[0] = 34;
        NameParser.QUOTES[1] = 39;
        NameParser.QUOTES[2] = 34;
        NameParser.QUOTES[3] = 39;
        AUTHOR_TEAM_PATTERN = Pattern.compile("^(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)|al\\.?))*$");
        RANK_MARKER_ALL = "(notho)? *(" + StringUtils.join(Rank.RANK_MARKER_MAP.keySet(), (String)"|") + ")\\.?";
        RANK_MARKER = Pattern.compile("^" + RANK_MARKER_ALL + "$");
        RANK_MARKER_AT_END = Pattern.compile(" " + RANK_MARKER_ALL + "$");
        RANK_MARKER_SPECIES = "(?:notho)?(?:" + StringUtils.join(Rank.RANK_MARKER_MAP_INFRASPECIFIC.keySet(), (String)"|") + "|agg)\\.?";
        INFRAGENERIC = "(?:\\( ?([A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153-]+) ?\\)|(" + StringUtils.join(Rank.RANK_MARKER_MAP_INFRAGENERIC.keySet(), (String)"|") + ")\\.? ?([" + NAME_LETTERS + "][" + name_letters + "-]+)" + ")";
        HYBRID_FORMULA_PATTERN = Pattern.compile(" \u00d7 ");
        CULTIVAR = Pattern.compile("(?: cv\\.? ?)?[\"'] ?((?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+[- ]?){1,3}) ?[\"']");
        IS_VIRUS_PATTERN = Pattern.compile("(\\b(bacterio)?phage(s)?\\b|virus(es)?\\b|\\bictv$)", 2);
        IS_VIRUS_PATTERN_POSTFAIL = Pattern.compile("(\\b(vector)\\b)", 2);
        EXTRACT_SENSU = Pattern.compile(",?\\s+((s\\.(?:l\\.|str\\.)|sensu\\s+(?:latu|strictu?)|(sec|sensu|auct|non)((\\.|\\s)(.*))?)$|\\((s\\.(?:l\\.|str\\.)|sensu\\s+(?:latu|strictu?)|(sec|sensu|auct|non)((\\.|\\s)(.*))?)\\))");
        EXTRACT_NOMSTATUS = Pattern.compile("(?:, ?| )\\(?((?:comb|gen|sp|var)?[\\. ] ?nov\\.?(?: ?ined\\.?)?|ined\\.|nom(?:\\s+|\\.\\s*|en\\s+)(?:utiq(?:ue\\s+|\\.\\s*))?(?:ambig|alter|alt|correct|cons|dubium|dub|herb|illeg|invalid|inval|negatum|neg|novum|nov|nudum|nud|oblitum|obl|praeoccup|prov|prot|transf|superfl|super|rejic|rej)\\.?(?:\\s+(?:prop|proposed)\\.?)?)\\)?");
        EXTRACT_REMARKS = Pattern.compile("\\s+(anon\\.?)(\\s.+)?$");
        EXTRACT_YEAR = Pattern.compile("([12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?\\s*\\)?)");
        COMMA_BEFORE_YEAR = Pattern.compile("(,+|[^0-9\\(\\[\"])\\s*(\\d{3})");
        REPLACE_QUOTES = Pattern.compile("(^\\s*[\"',]+)|([\"',]+\\s*$)");
        NORM_QUOTES = Pattern.compile("([\"'`\u00b4]+)");
        NORM_UPPERCASE_WORDS = Pattern.compile("\\b(\\p{Lu})(\\p{Lu}{2,})\\b");
        NORM_WHITESPACE = Pattern.compile("\\s+");
        NORM_NO_SQUARE_BRACKETS = Pattern.compile("\\[(.*?)\\]");
        NORM_BRACKETS_OPEN = Pattern.compile("([{(\\[])\\s*,?");
        NORM_BRACKETS_CLOSE = Pattern.compile(",?\\s*([})\\]])");
        NORM_BRACKETS_OPEN_STRONG = Pattern.compile("( ?[{(\\[] ?)+");
        NORM_BRACKETS_CLOSE_STRONG = Pattern.compile("( ?[})\\]] ?)+");
        NORM_AND = Pattern.compile(" (and|et|und|&amp;) ");
        NORM_ET_AL = Pattern.compile("& al\\.?");
        NORM_AMPERSAND_WS = Pattern.compile("&");
        NORM_HYPHENS = Pattern.compile("\\s*-\\s*");
        NORM_SUBGENUS = Pattern.compile("([A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?) ([A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?) ([a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{5,})");
        NO_Q_MARKS = Pattern.compile("([a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}])\\?+");
        NORM_COMMAS = Pattern.compile("\\s*,+");
        NORM_ORIG_AUTH = Pattern.compile("(?<=[ \\(])((?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)|al\\.?))*) ?\\( ?([12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?)\\)");
        NORM_ORIG_AUTH2 = Pattern.compile("\\(((?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)|al\\.?))*)\\) ?,? ?([12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?)");
        NORM_IMPRINT_YEAR = Pattern.compile("([12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?)\\s*(?:\\(\"?[\\s0-9-_,?]+\"?\\)|\\[\"?[0-9 -,]+\"?\\]|\"[0-9 -,]+\")");
        NORM_HYBRIDS_GENUS = Pattern.compile("^\\s*[+\u00d7xX]\\s*([A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152])");
        NORM_HYBRIDS_EPITH = Pattern.compile("^\\s*(\u00d7?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?)\\s+(?:\u00d7|[xX]\\s)\\s*((?:[0-9]+-)?(?:(?:van|novae) [a-z])?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153+-]{1,}(?<! d)[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153](?<!\\bex))");
        NORM_HYBRIDS_FORM = Pattern.compile(" [\u00d7xX] ");
        NORM_INDET = Pattern.compile("((^| )(undet|indet|aff|cf)[#!?\\.]?)+(?![a-z])");
        NORM_DOTS = Pattern.compile("(^\\s*[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]|" + RANK_MARKER_ALL + ")\\.");
        NORM_TF_GENUS = Pattern.compile("^([A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152])\\(([a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153-]+)\\)\\.? ");
        NORM_IN_BIB = Pattern.compile("( in .+$| ?: ?[0-9]+)", 2);
        NORM_PREFIXES = Pattern.compile("^(sub)?(fossil|" + StringUtils.join(Rank.RANK_MARKER_MAP_SUPRAGENERIC.keySet(), (String)"|") + ")\\.?\\s+", 2);
        NORM_SUFFIXES = Pattern.compile("[,;:]? (sp|anon|spp|hort|ms|[a-zA-Z][0-9])?\\.? *$", 2);
        BLACKLISTED = Pattern.compile("\\b(unnamed|mixed|unassigned|unallocated|unplaced|undetermined|unclassified|uncultured|unknown|unspecified|uncertain|incertae sedis|not assigned)\\b", 2);
        DOUBTFUL = Pattern.compile("^[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}\u00d7&*+ ,.()/'`\u00b40-9-]+$");
        badNameParts = new String[]{"author", "unknown", "unassigned", "not_stated"};
        XML_TAGS = Pattern.compile("< */? *[a-zA-Z] *>");
        FIRST_WORD = Pattern.compile("^([\u00d7xX]\\s+)?([\u00d7x][A-Z])?([a-zA-Z])([a-zA-Z]+) ");
        NORM_WEIRD_CHARS = Pattern.compile(WEIRD_CHARS);
        COMB_BAS_AUTHOR_SWAP = Pattern.compile("( (?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)|al\\.?))*)(?:( ?,? ?[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?))? ?\\(( ?(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)?(?:(?: ?ex\\.? | & | et | in |, ?|; ?|\\.)(?:(?:(?:(?:[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]{1,3}\\.?[ -]?){0,3}|[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}][a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]{3,} )?(?:[vV](?:an)(?:[ -](?:den|der) )? ?|von[ -](?:den |der |dem )?|(?:del|Des|De|de|di|Di|da|N)[`' _]|le |d'|D'|de la |Mac|Mc|Le|St\\.? ?|Ou|O')?(?:v\\. )?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?(?:(?:[- ](?:de|da|du)?[- ]?)[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152\\p{Lu}]+[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153\\p{Ll}?]*\\.?)?(?: ?(?:f|fil|j|jr|jun|junior|sr|sen|senior|ms)\\.?)?)|al\\.?))*)( ?,? ?[12][0-9][0-9][0-9?][abcdh?]?(?:[/-][0-9]{1,4})?)?\\)");
        CANON_NAME_IGNORE_AUTHORS = Pattern.compile("^(\u00d7?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?)(?:(?<!ceae) " + INFRAGENERIC + ")?" + "(?: " + AUTHOR_PREFIXES + ")?" + "(?: (\u00d7?" + EPHITHET + "))?" + "(?: " + AUTHOR_PREFIXES + ")?" + "(?:" + "(?:" + ".*" + "( " + RANK_MARKER_SPECIES + "[ .])" + "(\u00d7?" + EPHITHET + ")" + ")" + "|" + " (\u00d7?" + EPHITHET + ")" + ")?");
        NAME_PATTERN = Pattern.compile("^(\u00d7?[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152](?:\\.|[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)(?:-[A-Z\u00cf\u00cb\u00d6\u00dc\u00c4\u00c9\u00c8\u010c\u00c1\u00c0\u00c6\u0152]?[a-z\u00ef\u00eb\u00f6\u00fc\u00e4\u00e5\u00e9\u00e8\u010d\u00e1\u00e0\u00e6\u0153]+)?)(?:(?<!ceae) " + INFRAGENERIC + ")?" + "(?: (\u00d7?" + EPHITHET + "))?" + "(?:" + "(?:" + "( .*?)?" + "( " + RANK_MARKER_SPECIES + ")" + ")?" + "(?: (\u00d7?\"?" + EPHITHET + "\"?))" + ")?" + "(,?" + "(?: ?\\(" + "(" + AUTHOR_TEAM + ")?" + ",?( ?" + YEAR + ")?" + "\\))?" + "( " + AUTHOR_TEAM + ")?" + "(?: ?\\(?,? ?(" + YEAR + ")\\)?)?" + ")" + "$");
    }
}

