/*
 * Decompiled with CFR 0.152.
 */
package org.gbif.utils.file;

import com.google.common.base.Charsets;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import org.gbif.utils.file.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CharsetDetection {
    private static final Logger LOG = LoggerFactory.getLogger(CharsetDetection.class);
    private static final byte LF = 10;
    private static final byte CR = 13;
    private static final byte TAB = 9;
    private static final int UNDEFINED_PENALTY = 100;
    private static final char[] COMMON_NON_ASCII_CHARS;
    private static final Charset LATIN1;
    private static final Charset WINDOWS1252;
    private static final Charset MACROMAN;
    private final byte[] buffer;

    private CharsetDetection(byte[] buffer) {
        this.buffer = buffer;
    }

    public static Charset detectEncoding(File file) throws IOException {
        byte[] data = FileUtils.readByteBuffer(file).array();
        CharsetDetection detector = new CharsetDetection(data);
        Charset charset = detector.detectEncoding();
        LOG.debug("Detected character encoding " + charset.displayName());
        return charset;
    }

    public static Charset detectEncoding(File file, int bufferLength) throws IOException {
        byte[] data = FileUtils.readByteBuffer(file, bufferLength).array();
        CharsetDetection detector = new CharsetDetection(data);
        Charset charset = detector.detectEncoding();
        LOG.debug("Detected character encoding " + charset.displayName());
        return charset;
    }

    public static Charset getDefaultSystemCharset() {
        return Charset.forName(System.getProperty("file.encoding"));
    }

    protected static boolean hasUTF16BEBom(byte[] bom) {
        return bom[0] == -2 && bom[1] == -1;
    }

    protected static boolean hasUTF16LEBom(byte[] bom) {
        return bom[0] == -1 && bom[1] == -2;
    }

    protected static boolean hasUTF8Bom(byte[] bom) {
        return bom[0] == -17 && bom[1] == -69 && bom[2] == -65;
    }

    private static boolean isCommonChar(char c) {
        for (char cc : COMMON_NON_ASCII_CHARS) {
            if (c != cc) continue;
            return true;
        }
        return false;
    }

    private static boolean isContinuationChar(byte b) {
        return -128 <= b && b <= -65;
    }

    private static boolean isFiveBytesSequence(byte b) {
        return -8 <= b && b <= -5;
    }

    private static boolean isFourBytesSequence(byte b) {
        return -16 <= b && b <= -9;
    }

    private static boolean isSixBytesSequence(byte b) {
        return -4 <= b && b <= -3;
    }

    private static boolean isThreeBytesSequence(byte b) {
        return -32 <= b && b <= -17;
    }

    private static boolean isTwoBytesSequence(byte b) {
        return -64 <= b && b <= -33;
    }

    private Charset detectCharacterEncoding8bit() {
        long suspicousChars;
        long leastSuspicousChars = this.testLatin1();
        Charset bestEncoding = LATIN1;
        if (WINDOWS1252 != null && (suspicousChars = this.testWindows1252()) < leastSuspicousChars) {
            leastSuspicousChars = suspicousChars;
            bestEncoding = WINDOWS1252;
        }
        if (MACROMAN != null && (suspicousChars = this.testMacRoman()) < leastSuspicousChars) {
            leastSuspicousChars = suspicousChars;
            bestEncoding = MACROMAN;
        }
        LOG.debug("8bit Encoding guessed: {} with {} rare characters", (Object)bestEncoding, (Object)leastSuspicousChars);
        return bestEncoding;
    }

    public Charset detectEncoding() {
        if (CharsetDetection.hasUTF8Bom(this.buffer)) {
            return Charsets.UTF_8;
        }
        if (CharsetDetection.hasUTF16LEBom(this.buffer)) {
            return Charsets.UTF_16LE;
        }
        if (CharsetDetection.hasUTF16BEBom(this.buffer)) {
            return Charsets.UTF_16BE;
        }
        Charset cs = this.detectUtf16();
        if (cs != null) {
            return cs;
        }
        boolean validU8Char = true;
        int length = this.buffer.length;
        for (int i = 0; i < length - 6; ++i) {
            byte b0 = this.buffer[i];
            byte b1 = this.buffer[i + 1];
            byte b2 = this.buffer[i + 2];
            byte b3 = this.buffer[i + 3];
            byte b4 = this.buffer[i + 4];
            byte b5 = this.buffer[i + 5];
            if (b0 < 0) {
                if (CharsetDetection.isTwoBytesSequence(b0)) {
                    if (CharsetDetection.isContinuationChar(b1)) {
                        ++i;
                    } else {
                        validU8Char = false;
                    }
                } else if (CharsetDetection.isThreeBytesSequence(b0)) {
                    if (CharsetDetection.isContinuationChar(b1) && CharsetDetection.isContinuationChar(b2)) {
                        i += 2;
                    } else {
                        validU8Char = false;
                    }
                } else if (CharsetDetection.isFourBytesSequence(b0)) {
                    if (CharsetDetection.isContinuationChar(b1) && CharsetDetection.isContinuationChar(b2) && CharsetDetection.isContinuationChar(b3)) {
                        i += 3;
                    } else {
                        validU8Char = false;
                    }
                } else if (CharsetDetection.isFiveBytesSequence(b0)) {
                    if (CharsetDetection.isContinuationChar(b1) && CharsetDetection.isContinuationChar(b2) && CharsetDetection.isContinuationChar(b3) && CharsetDetection.isContinuationChar(b4)) {
                        i += 4;
                    } else {
                        validU8Char = false;
                    }
                } else if (CharsetDetection.isSixBytesSequence(b0)) {
                    if (CharsetDetection.isContinuationChar(b1) && CharsetDetection.isContinuationChar(b2) && CharsetDetection.isContinuationChar(b3) && CharsetDetection.isContinuationChar(b4) && CharsetDetection.isContinuationChar(b5)) {
                        i += 5;
                    } else {
                        validU8Char = false;
                    }
                } else {
                    validU8Char = false;
                }
            }
            if (!validU8Char) break;
        }
        if (validU8Char) {
            return Charsets.UTF_8;
        }
        return this.detectCharacterEncoding8bit();
    }

    private Charset detectUtf16() {
        int zerosLE = 0;
        int zerosBE = 0;
        boolean even = true;
        int length = this.buffer.length;
        int i = 0;
        while (i < length) {
            byte b = this.buffer[i];
            ++i;
            boolean bl = even = !even;
            if (b != 0) continue;
            if (even) {
                ++zerosLE;
                continue;
            }
            ++zerosBE;
        }
        int min = this.buffer.length / 10;
        if ((zerosBE > min || zerosLE > min) && Math.abs(zerosBE - zerosLE) > min) {
            Charset charset = zerosBE > zerosLE ? Charsets.UTF_16BE : Charsets.UTF_16LE;
            try {
                CharsetDecoder decoder = charset.newDecoder();
                decoder.decode(ByteBuffer.wrap(this.buffer));
                return charset;
            }
            catch (CharacterCodingException e) {
                charset = Charsets.UTF_16;
                try {
                    CharsetDecoder decoder = charset.newDecoder();
                    decoder.decode(ByteBuffer.wrap(this.buffer));
                    return charset;
                }
                catch (CharacterCodingException characterCodingException) {
                    // empty catch block
                }
            }
        }
        return null;
    }

    private long testLatin1() {
        Charset charset = Charsets.ISO_8859_1;
        CharsetDecoder decoder = charset.newDecoder();
        long suspicous = 0L;
        try {
            CharBuffer cbuf = decoder.decode(ByteBuffer.wrap(this.buffer));
            while (cbuf.hasRemaining()) {
                char c = cbuf.get();
                if (!CharsetDetection.isCommonChar(c)) continue;
                --suspicous;
            }
            int length = this.buffer.length;
            int i = 0;
            while (i < length) {
                byte b = this.buffer[i];
                ++i;
                if (b < -128 || b > -97) continue;
                suspicous += 100L;
            }
        }
        catch (CharacterCodingException e) {
            suspicous = Long.MAX_VALUE;
        }
        return suspicous;
    }

    private long testMacRoman() {
        CharsetDecoder decoder = MACROMAN.newDecoder();
        long suspicous = 0L;
        try {
            CharBuffer cbuf = decoder.decode(ByteBuffer.wrap(this.buffer));
            while (cbuf.hasRemaining()) {
                char c = cbuf.get();
                if (!CharsetDetection.isCommonChar(c)) continue;
                --suspicous;
            }
            for (byte b : this.buffer) {
            }
        }
        catch (CharacterCodingException e) {
            suspicous = Long.MAX_VALUE;
        }
        return suspicous;
    }

    private long testWindows1252() {
        CharsetDecoder decoder = WINDOWS1252.newDecoder();
        long suspicous = 0L;
        try {
            CharBuffer cbuf = decoder.decode(ByteBuffer.wrap(this.buffer));
            while (cbuf.hasRemaining()) {
                char c = cbuf.get();
                if (!CharsetDetection.isCommonChar(c)) continue;
                --suspicous;
            }
            int length = this.buffer.length;
            int i = 0;
            while (i < length) {
                byte b = this.buffer[i];
                ++i;
                if (b != -127 && b != -115 && b != -113 && b != -112 && b != -99) continue;
                suspicous += 100L;
            }
        }
        catch (CharacterCodingException e) {
            suspicous = Long.MAX_VALUE;
        }
        return suspicous;
    }

    static {
        String commonChars = "\u00e4\u00e5\u00e1\u00e0\u00e6\u0153\u010d\u00e9\u00e8\u00ea\u00eb\u00ef\u00f1\u00f8\u00f6\u00fc\u00df\u0161\u017e";
        CharBuffer cbuf = CharBuffer.allocate(commonChars.length() * 2);
        for (char c : commonChars.toCharArray()) {
            cbuf.append(c);
            cbuf.append(Character.toUpperCase(c));
        }
        COMMON_NON_ASCII_CHARS = cbuf.array();
        LATIN1 = Charsets.ISO_8859_1;
        Charset cs = null;
        try {
            cs = Charset.forName("Cp1252");
        }
        catch (Exception e) {
            LOG.warn("Windows 1252 encoding not supported on this Virtual Machine");
        }
        WINDOWS1252 = cs;
        cs = null;
        try {
            cs = Charset.forName("MacRoman");
        }
        catch (Exception e) {
            LOG.warn("MacRoman encoding not supported on this Virtual Machine");
        }
        MACROMAN = cs;
    }
}

