package org.opensearch.neuralsearch.processor.chunker;

import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.opensearch.action.admin.indices.analyze.AnalyzeAction;
import org.opensearch.action.admin.indices.analyze.TransportAnalyzeAction;
import org.opensearch.index.IndexService;
import org.opensearch.index.analysis.AnalysisRegistry;

/* loaded from: input_file:org/opensearch/neuralsearch/processor/chunker/FixedTokenLengthChunker.class */
public final class FixedTokenLengthChunker implements Chunker {
    public static final String ALGORITHM_NAME = "fixed_token_length";
    public static final String ANALYSIS_REGISTRY_FIELD = "analysis_registry";
    public static final String TOKEN_LIMIT_FIELD = "token_limit";
    public static final String OVERLAP_RATE_FIELD = "overlap_rate";
    public static final String MAX_TOKEN_COUNT_FIELD = "max_token_count";
    public static final String TOKENIZER_FIELD = "tokenizer";
    private static final int DEFAULT_TOKEN_LIMIT = 384;
    private static final double DEFAULT_OVERLAP_RATE = 0.0d;
    private static final double OVERLAP_RATE_LOWER_BOUND = 0.0d;
    private static final double OVERLAP_RATE_UPPER_BOUND = 0.5d;
    private int tokenLimit;
    private String tokenizer;
    private double overlapRate;
    private final AnalysisRegistry analysisRegistry;
    private static final String DEFAULT_TOKENIZER = "standard";
    private static final Set<String> WORD_TOKENIZERS = Set.of(DEFAULT_TOKENIZER, "letter", "lowercase", "whitespace", "uax_url_email", "classic", "thai");

    public FixedTokenLengthChunker(Map<String, Object> map) {
        parseParameters(map);
        this.analysisRegistry = (AnalysisRegistry) map.get(ANALYSIS_REGISTRY_FIELD);
    }

    @Override // org.opensearch.neuralsearch.processor.chunker.Chunker
    public void parseParameters(Map<String, Object> map) {
        this.tokenLimit = ChunkerParameterParser.parsePositiveIntegerWithDefault(map, TOKEN_LIMIT_FIELD, Integer.valueOf(DEFAULT_TOKEN_LIMIT));
        this.overlapRate = ChunkerParameterParser.parseDoubleWithDefault(map, OVERLAP_RATE_FIELD, 0.0d);
        this.tokenizer = ChunkerParameterParser.parseStringWithDefault(map, TOKENIZER_FIELD, DEFAULT_TOKENIZER);
        if (this.overlapRate < 0.0d || this.overlapRate > OVERLAP_RATE_UPPER_BOUND) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Parameter [%s] must be between %s and %s", OVERLAP_RATE_FIELD, Double.valueOf(0.0d), Double.valueOf(OVERLAP_RATE_UPPER_BOUND)));
        }
        if (!WORD_TOKENIZERS.contains(this.tokenizer)) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Tokenizer [%s] is not supported for [%s] algorithm. Supported tokenizers are %s", this.tokenizer, ALGORITHM_NAME, WORD_TOKENIZERS));
        }
    }

    @Override // org.opensearch.neuralsearch.processor.chunker.Chunker
    public List<String> chunk(String str, Map<String, Object> map) {
        int parseInteger = ChunkerParameterParser.parseInteger(map, MAX_TOKEN_COUNT_FIELD);
        int parseInteger2 = ChunkerParameterParser.parseInteger(map, Chunker.MAX_CHUNK_LIMIT_FIELD);
        int parseInteger3 = ChunkerParameterParser.parseInteger(map, Chunker.CHUNK_STRING_COUNT_FIELD);
        List<AnalyzeAction.AnalyzeToken> list = tokenize(str, this.tokenizer, parseInteger);
        ArrayList arrayList = new ArrayList();
        int i = 0;
        int floor = (int) Math.floor(this.tokenLimit * this.overlapRate);
        while (true) {
            if (i >= list.size()) {
                break;
            }
            int startOffset = i == 0 ? 0 : list.get(i).getStartOffset();
            if (Chunker.checkRunTimeMaxChunkLimit(arrayList.size(), parseInteger2, parseInteger3)) {
                arrayList.add(str.substring(startOffset));
                break;
            }
            if (i + this.tokenLimit >= list.size()) {
                arrayList.add(str.substring(startOffset, str.length()));
                break;
            }
            arrayList.add(str.substring(startOffset, list.get(i + this.tokenLimit).getStartOffset()));
            i += this.tokenLimit - floor;
        }
        return arrayList;
    }

    private List<AnalyzeAction.AnalyzeToken> tokenize(String str, String str2, int i) {
        AnalyzeAction.Request request = new AnalyzeAction.Request();
        request.text(new String[]{str});
        request.tokenizer(str2);
        try {
            return TransportAnalyzeAction.analyze(request, this.analysisRegistry, (IndexService) null, i).getTokens();
        } catch (Exception e) {
            throw new IllegalStateException(String.format(Locale.ROOT, "analyzer %s throws exception: %s", str2, e.getMessage()), e);
        }
    }
}
