package org.grobid.core.utilities.matching;

import com.google.common.base.Joiner;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.grobid.core.utilities.Pair;
import org.grobid.shaded.org.apache.lucene.analysis.Analyzer;
import org.grobid.shaded.org.apache.lucene.analysis.TokenStream;
import org.grobid.shaded.org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.grobid.shaded.org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.grobid.shaded.org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.grobid.shaded.org.apache.lucene.util.Version;

/* loaded from: input_file:org/grobid/core/utilities/matching/LuceneUtil.class */
public class LuceneUtil {
    private LuceneUtil() {
    }

    public static StandardAnalyzer createStandardAnalyzer() {
        return new StandardAnalyzer(Version.LUCENE_45);
    }

    public static String normalizeString(Analyzer analyzer, String str) {
        return Joiner.on(' ').join(tokenizeString(analyzer, str));
    }

    public static String normalizeTokens(Analyzer analyzer, List<String> list) {
        return Joiner.on(' ').join(list);
    }

    private static List<String> readerToTokens(Analyzer analyzer, Reader reader) throws IOException {
        ArrayList arrayList = new ArrayList();
        TokenStream tokenStream = analyzer.tokenStream("", reader);
        tokenStream.reset();
        CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.addAttribute(CharTermAttribute.class);
        while (tokenStream.incrementToken()) {
            arrayList.add(String.valueOf(charTermAttribute.buffer(), 0, charTermAttribute.length()));
        }
        tokenStream.end();
        tokenStream.close();
        return arrayList;
    }

    public static List<String> tokenizeString(Analyzer analyzer, String str) {
        try {
            return readerToTokens(analyzer, new StringReader(str));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public static List<Pair<String, String>> tokenizeWithTokenTypes(Analyzer analyzer, String str) {
        StringReader stringReader = new StringReader(str);
        ArrayList arrayList = new ArrayList();
        try {
            TokenStream tokenStream = analyzer.tokenStream("", stringReader);
            tokenStream.reset();
            CharTermAttribute charTermAttribute = (CharTermAttribute) tokenStream.addAttribute(CharTermAttribute.class);
            TypeAttribute typeAttribute = (TypeAttribute) tokenStream.addAttribute(TypeAttribute.class);
            while (tokenStream.incrementToken()) {
                arrayList.add(new Pair(String.valueOf(charTermAttribute.buffer(), 0, charTermAttribute.length()), typeAttribute.type()));
            }
            tokenStream.end();
            tokenStream.close();
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException("Error during tokenization", e);
        }
    }
}
