package org.grobid.core.analyzers;

import java.util.ArrayList;
import java.util.List;
import org.grobid.core.lang.Language;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.UnicodeUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wipo.nlp.textboundaries.ReTokenizer;
import org.wipo.nlp.textboundaries.ReTokenizerFactory;

/* loaded from: input_file:org/grobid/core/analyzers/GrobidAnalyzer.class */
public class GrobidAnalyzer implements Analyzer {
    private static final Logger LOGGER = LoggerFactory.getLogger(GrobidAnalyzer.class);
    private static volatile GrobidAnalyzer instance;
    private ReTokenizer jaAnalyzer = null;
    private ReTokenizer krAnalyzer = null;
    private ReTokenizer zhAnalyzer = null;

    public static GrobidAnalyzer getInstance() {
        if (instance == null && instance == null) {
            getNewInstance();
        }
        return instance;
    }

    private static synchronized void getNewInstance() {
        LOGGER.debug("Get new instance of GrobidAnalyzer");
        instance = new GrobidAnalyzer();
    }

    private GrobidAnalyzer() {
    }

    @Override // org.grobid.core.analyzers.Analyzer
    public String getName() {
        return "GrobidAnalyzer";
    }

    @Override // org.grobid.core.analyzers.Analyzer
    public List<String> tokenize(String str) {
        return tokenize(str, null);
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.grobid.core.analyzers.Analyzer
    public List<String> tokenize(String str, Language language) {
        List<String> arrayList = new ArrayList();
        if (str == null || str.length() == 0) {
            return arrayList;
        }
        if (language != null) {
            try {
            } catch (Exception e) {
                LOGGER.error("Invalid tokenizer", e);
            }
            if (language.getLang() != null) {
                if (language.isJapaneses()) {
                    if (this.jaAnalyzer == null) {
                        this.jaAnalyzer = ReTokenizerFactory.create("ja_g");
                    }
                    arrayList = this.jaAnalyzer.tokensAsList(str);
                } else if (language.isChinese()) {
                    if (this.zhAnalyzer == null) {
                        this.zhAnalyzer = ReTokenizerFactory.create("zh_g");
                    }
                    arrayList = this.zhAnalyzer.tokensAsList(str);
                } else if (language.isKorean()) {
                    if (this.krAnalyzer == null) {
                        this.krAnalyzer = ReTokenizerFactory.create("kr_g");
                    }
                    arrayList = this.krAnalyzer.tokensAsList(str);
                } else if (language.isArabic()) {
                    arrayList = GrobidDefaultAnalyzer.getInstance().tokenize(str);
                    int i = 0;
                    for (String str2 : arrayList) {
                        StringBuilder sb = new StringBuilder();
                        for (int i2 = 0; i2 < str2.length(); i2++) {
                            sb.append(ArabicChars.arabicCharacters(str2.charAt(i2)));
                        }
                        arrayList.set(i, sb.toString());
                        i++;
                    }
                } else {
                    arrayList = GrobidDefaultAnalyzer.getInstance().tokenize(str);
                }
                return arrayList;
            }
        }
        arrayList = GrobidDefaultAnalyzer.getInstance().tokenize(str);
        return arrayList;
    }

    @Override // org.grobid.core.analyzers.Analyzer
    public List<String> retokenize(List<String> list) {
        return retokenize(list, null);
    }

    public List<String> retokenize(List<String> list, Language language) {
        List<String> list2 = null;
        if (list == null || list.size() == 0) {
            return new ArrayList();
        }
        if (language == null || language.getLang() == null) {
            list2 = GrobidDefaultAnalyzer.getInstance().retokenize(list);
        } else if (!language.isJapaneses() && !language.isChinese() && !language.isKorean() && !language.isArabic()) {
            list2 = GrobidDefaultAnalyzer.getInstance().retokenize(list);
        }
        return list2;
    }

    @Override // org.grobid.core.analyzers.Analyzer
    public List<LayoutToken> tokenizeWithLayoutToken(String str) {
        return tokenizeWithLayoutToken(str, null);
    }

    public List<LayoutToken> tokenizeWithLayoutToken(String str, Language language) {
        return LayoutTokensUtil.getLayoutTokensForTokenizedText(tokenize(UnicodeUtil.normaliseText(str), language));
    }
}
