/*
 * Decompiled with CFR 0.152.
 */
package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.ByteArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.ShortArrayList;
import com.carrotsearch.hppc.cursors.IntCursor;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.stream.Stream;
import org.carrot2.clustering.Document;
import org.carrot2.language.Tokenizer;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.util.CharArrayUtils;
import org.carrot2.util.MutableCharArray;
import org.carrot2.util.StringUtils;

final class InputTokenizer {
    private ArrayList<char[]> images;
    private ShortArrayList tokenTypes;
    private IntArrayList documentIndices;
    private ByteArrayList fieldIndices;

    InputTokenizer() {
    }

    public void tokenize(PreprocessingContext context, Stream<? extends Document> docStream) {
        this.images = new ArrayList();
        this.tokenTypes = new ShortArrayList();
        this.documentIndices = new IntArrayList();
        this.fieldIndices = new ByteArrayList();
        Tokenizer ts = context.languageComponents.get(Tokenizer.class);
        MutableCharArray wrapper = new MutableCharArray(CharArrayUtils.EMPTY_ARRAY);
        HashMap<String, Integer> fieldIndexes = new HashMap<String, Integer>();
        ArrayList fields = new ArrayList();
        IntCursor docCount = new IntCursor();
        docStream.forEachOrdered(doc -> {
            int documentIndex = docCount.value;
            if (documentIndex > 0) {
                this.addDocumentSeparator();
            }
            fields.clear();
            doc.visitFields((fieldName, fieldValue) -> {
                if (!StringUtils.isNullOrEmpty(fieldValue)) {
                    fields.add(new FieldValue((String)fieldName, (String)fieldValue));
                }
            });
            boolean hadTokens = false;
            for (FieldValue fv : fields) {
                int fieldIndex = fieldIndexes.computeIfAbsent(fv.field, k -> fieldIndexes.size());
                if (fieldIndex > 127) {
                    throw new RuntimeException("Too many fields (>" + fieldIndex + ")");
                }
                String fieldValue2 = fv.value;
                if (StringUtils.isNullOrEmpty(fieldValue2)) continue;
                try {
                    ts.reset(new StringReader(fieldValue2));
                    short tokenType = ts.nextToken();
                    if (tokenType == -1) continue;
                    if (hadTokens) {
                        this.addFieldSeparator(documentIndex);
                    }
                    do {
                        ts.setTermBuffer(wrapper);
                        this.add(documentIndex, (byte)fieldIndex, context.intern(wrapper), tokenType);
                    } while ((tokenType = ts.nextToken()) != -1);
                    hadTokens = true;
                }
                catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            ++docCount.value;
        });
        this.addTerminator();
        String[] fieldNames = new String[fieldIndexes.size()];
        fieldIndexes.forEach((field, index) -> {
            fieldNames[index.intValue()] = field;
        });
        context.documentCount = docCount.value;
        context.allTokens.documentIndex = this.documentIndices.toArray();
        context.allTokens.fieldIndex = this.fieldIndices.toArray();
        context.allTokens.image = (char[][])this.images.toArray((T[])new char[this.images.size()][]);
        context.allTokens.type = this.tokenTypes.toArray();
        context.allFields.name = fieldNames;
        this.images = null;
        this.fieldIndices = null;
        this.tokenTypes = null;
        this.documentIndices = null;
    }

    void addTerminator() {
        this.add(-1, (byte)-1, null, (short)2048);
    }

    void addDocumentSeparator() {
        this.add(-1, (byte)-1, null, (short)512);
    }

    void addFieldSeparator(int documentIndex) {
        this.add(documentIndex, (byte)-1, null, (short)1024);
    }

    void add(int documentIndex, byte fieldIndex, char[] image, short tokenTypeCode) {
        this.documentIndices.add(documentIndex);
        this.fieldIndices.add(fieldIndex);
        this.images.add(image);
        this.tokenTypes.add(tokenTypeCode);
    }

    private static class FieldValue {
        String field;
        String value;

        public FieldValue(String fieldName, String fieldValue) {
            this.field = fieldName;
            this.value = fieldValue;
        }
    }
}

