/*
 * Decompiled with CFR 0.152.
 */
package org.tribuo.util.tokens.impl;

import org.tribuo.util.tokens.Token;
import org.tribuo.util.tokens.Tokenizer;

public abstract class SplitFunctionTokenizer
implements Tokenizer {
    protected SplitFunction splitFunction;
    private String cs;
    private int start;
    private int p;
    private StringBuilder tokenSb = new StringBuilder();
    private Token.TokenType currentType = Token.TokenType.WORD;
    private Token currentToken;
    private Token nextToken;
    private boolean ready;

    protected SplitFunctionTokenizer() {
    }

    public SplitFunctionTokenizer(SplitFunction splitFunction) {
        this.splitFunction = splitFunction;
    }

    @Override
    public void reset(CharSequence cs) {
        this.cs = cs.toString();
        this.start = 0;
        this.p = 0;
        this.tokenSb.delete(0, this.tokenSb.length());
        this.ready = false;
    }

    @Override
    public boolean advance() {
        if (this.cs == null) {
            throw new IllegalStateException("SplitFunctionTokenizer has not been reset.");
        }
        if (this.nextToken != null) {
            this.currentToken = this.nextToken;
            this.nextToken = null;
            return true;
        }
        if (this.p >= this.cs.length()) {
            return false;
        }
        this.currentToken = null;
        this.tokenSb.delete(0, this.tokenSb.length());
        while (this.p < this.cs.length()) {
            int codepoint = this.cs.codePointAt(this.p);
            SplitResult splitResult = this.splitFunction.apply(codepoint, this.p, this.cs);
            SplitType splitType = splitResult.splitType;
            Token.TokenType tokenType = splitResult.tokenType;
            if (splitType == SplitType.NO_SPLIT) {
                if (this.tokenSb.length() == 0) {
                    this.start = this.p;
                }
                this.p += Character.charCount(codepoint);
                this.tokenSb.appendCodePoint(codepoint);
                this.currentType = tokenType;
                continue;
            }
            if (splitType == SplitType.SPLIT_AT) {
                if (this.tokenSb.length() > 0) {
                    this.currentToken = new Token(this.tokenSb.toString(), this.start, this.p, this.currentType);
                }
                this.p += Character.charCount(codepoint);
                this.start = this.p;
                this.tokenSb.delete(0, this.tokenSb.length());
            } else if (splitType == SplitType.SPLIT_BEFORE) {
                if (this.tokenSb.length() > 0) {
                    this.currentToken = new Token(this.tokenSb.toString(), this.start, this.p, this.currentType);
                }
                this.start = this.p;
                this.tokenSb.delete(0, this.tokenSb.length());
                this.tokenSb.appendCodePoint(codepoint);
                this.p += Character.charCount(codepoint);
            } else if (splitType == SplitType.SPLIT_AFTER) {
                this.p += Character.charCount(codepoint);
                this.tokenSb.appendCodePoint(codepoint);
                this.currentToken = new Token(this.tokenSb.toString(), this.start, this.p, tokenType);
                this.tokenSb.delete(0, this.tokenSb.length());
                this.start = this.p;
            } else if (splitType == SplitType.SPLIT_BEFORE_AND_AFTER) {
                if (this.tokenSb.length() > 0) {
                    this.currentToken = new Token(this.tokenSb.toString(), this.start, this.p, this.currentType);
                    this.tokenSb.delete(0, this.tokenSb.length());
                    this.start = this.p;
                    this.p += Character.charCount(codepoint);
                    this.tokenSb.appendCodePoint(codepoint);
                    this.nextToken = new Token(this.tokenSb.toString(), this.start, this.p, tokenType);
                    this.tokenSb.delete(0, this.tokenSb.length());
                } else {
                    this.start = this.p;
                    this.p += Character.charCount(codepoint);
                    this.tokenSb.appendCodePoint(codepoint);
                    this.currentToken = new Token(this.tokenSb.toString(), this.start, this.p, tokenType);
                    this.tokenSb.delete(0, this.tokenSb.length());
                }
            }
            if (this.currentToken == null) continue;
            break;
        }
        if (this.currentToken == null && this.tokenSb.length() > 0) {
            this.currentToken = new Token(this.tokenSb.toString(), this.start, this.p, this.currentType);
        }
        if (this.currentToken != null) {
            this.ready = true;
            return true;
        }
        return false;
    }

    @Override
    public String getText() {
        if (this.ready) {
            return this.currentToken.text;
        }
        throw new IllegalStateException("SplitFunctionTokenizer is not ready.");
    }

    @Override
    public int getStart() {
        if (this.ready) {
            return this.currentToken.start;
        }
        throw new IllegalStateException("SplitFunctionTokenizer is not ready.");
    }

    @Override
    public int getEnd() {
        if (this.ready) {
            return this.currentToken.end;
        }
        throw new IllegalStateException("SplitFunctionTokenizer is not ready.");
    }

    @Override
    public Token.TokenType getType() {
        return this.currentToken.type;
    }

    @Override
    public Tokenizer clone() throws CloneNotSupportedException {
        throw new UnsupportedOperationException("abstract class SplitFunctionTokenizer does not implement clone method.  Subclasses must implement this method.");
    }

    @FunctionalInterface
    public static interface SplitFunction {
        public SplitResult apply(int var1, int var2, CharSequence var3);
    }

    public static enum SplitResult {
        NO_SPLIT_WORD(SplitType.NO_SPLIT, Token.TokenType.WORD),
        NO_SPLIT_NGRAM(SplitType.NO_SPLIT, Token.TokenType.NGRAM),
        NO_SPLIT_PUNCTUATION(SplitType.NO_SPLIT, Token.TokenType.PUNCTUATION),
        NO_SPLIT_WHITESPACE(SplitType.NO_SPLIT, Token.TokenType.WHITESPACE),
        NO_SPLIT_PREFIX(SplitType.NO_SPLIT, Token.TokenType.PREFIX),
        NO_SPLIT_SUFFIX(SplitType.NO_SPLIT, Token.TokenType.SUFFIX),
        NO_SPLIT_INFIX(SplitType.NO_SPLIT, Token.TokenType.INFIX),
        NO_SPLIT_UNKNOWN(SplitType.NO_SPLIT, Token.TokenType.UNKNOWN),
        SPLIT_AT(SplitType.SPLIT_AT, Token.TokenType.WORD),
        SPLIT_BEFORE(SplitType.SPLIT_BEFORE, Token.TokenType.WORD),
        SPLIT_AFTER_WORD(SplitType.SPLIT_AFTER, Token.TokenType.WORD),
        SPLIT_AFTER_NGRAM(SplitType.SPLIT_AFTER, Token.TokenType.NGRAM),
        SPLIT_AFTER_PUNCTUATION(SplitType.SPLIT_AFTER, Token.TokenType.PUNCTUATION),
        SPLIT_AFTER_WHITESPACE(SplitType.SPLIT_AFTER, Token.TokenType.WHITESPACE),
        SPLIT_AFTER_PREFIX(SplitType.SPLIT_AFTER, Token.TokenType.PREFIX),
        SPLIT_AFTER_SUFFIX(SplitType.SPLIT_AFTER, Token.TokenType.SUFFIX),
        SPLIT_AFTER_INFIX(SplitType.SPLIT_AFTER, Token.TokenType.INFIX),
        SPLIT_AFTER_UNKNOWN(SplitType.SPLIT_AFTER, Token.TokenType.UNKNOWN),
        SPLIT_BEFORE_AND_AFTER_WORD(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.WORD),
        SPLIT_BEFORE_AND_AFTER_NGRAM(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.NGRAM),
        SPLIT_BEFORE_AND_AFTER_PUNCTUATION(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.PUNCTUATION),
        SPLIT_BEFORE_AND_AFTER_WHITESPACE(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.WHITESPACE),
        SPLIT_BEFORE_AND_AFTER_PREFIX(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.PREFIX),
        SPLIT_BEFORE_AND_AFTER_SUFFIX(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.SUFFIX),
        SPLIT_BEFORE_AND_AFTER_INFIX(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.INFIX),
        SPLIT_BEFORE_AND_AFTER_UNKNOWN(SplitType.SPLIT_BEFORE_AND_AFTER, Token.TokenType.UNKNOWN);

        public final SplitType splitType;
        public final Token.TokenType tokenType;

        private SplitResult(SplitType splitType, Token.TokenType tokenType) {
            this.splitType = splitType;
            this.tokenType = tokenType;
        }
    }

    public static enum SplitType {
        NO_SPLIT,
        SPLIT_AT,
        SPLIT_BEFORE,
        SPLIT_AFTER,
        SPLIT_BEFORE_AND_AFTER;

    }
}

