package org.grobid.core.features;

import java.util.StringTokenizer;
import org.grobid.core.utilities.TextUtilities;

/* loaded from: input_file:org/grobid/core/features/FeaturesVectorChemicalEntity.class */
public class FeaturesVectorChemicalEntity {
    private static int nbBins = 12;
    public String digit;
    public String string = null;
    public String label = null;
    public String capitalisation = null;
    public boolean singleChar = false;
    public boolean properName = false;
    public boolean commonName = false;
    public boolean firstName = false;
    public String punctType = null;
    public boolean isKnownChemicalToken = false;
    public boolean isKnownChemicalNameToken = false;
    public int relativeDocumentPosition = -1;

    public String printVector() {
        if (this.string == null || this.string.length() == 0) {
            return null;
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(this.string);
        stringBuffer.append(" " + this.string.toLowerCase());
        stringBuffer.append(" " + TextUtilities.prefix(this.string, 1));
        stringBuffer.append(" " + TextUtilities.prefix(this.string, 2));
        stringBuffer.append(" " + TextUtilities.prefix(this.string, 3));
        stringBuffer.append(" " + TextUtilities.prefix(this.string, 4));
        stringBuffer.append(" " + TextUtilities.suffix(this.string, 1));
        stringBuffer.append(" " + TextUtilities.suffix(this.string, 2));
        stringBuffer.append(" " + TextUtilities.suffix(this.string, 3));
        stringBuffer.append(" " + TextUtilities.suffix(this.string, 4));
        if (this.digit.equals("ALLDIGIT")) {
            stringBuffer.append(" NOCAPS");
        } else {
            stringBuffer.append(" " + this.capitalisation);
        }
        stringBuffer.append(" " + this.digit);
        if (this.singleChar) {
            stringBuffer.append(" 1");
        } else {
            stringBuffer.append(" 0");
        }
        if (this.properName) {
            stringBuffer.append(" 1");
        } else {
            stringBuffer.append(" 0");
        }
        if (this.commonName) {
            stringBuffer.append(" 1");
        } else {
            stringBuffer.append(" 0");
        }
        if (this.isKnownChemicalToken) {
            stringBuffer.append(" 1");
        } else {
            stringBuffer.append(" 0");
        }
        if (this.isKnownChemicalNameToken) {
            stringBuffer.append(" 1");
        } else {
            stringBuffer.append(" 0");
        }
        stringBuffer.append(" " + this.punctType);
        stringBuffer.append(" " + this.string.length());
        stringBuffer.append(" " + this.relativeDocumentPosition);
        if (this.label != null) {
            stringBuffer.append(" " + this.label + "\n");
        } else {
            stringBuffer.append(" 0\n");
        }
        return stringBuffer.toString();
    }

    public static FeaturesVectorChemicalEntity addFeaturesChemicalEntities(String str, int i, int i2, boolean z, boolean z2) {
        FeatureFactory featureFactory = FeatureFactory.getInstance();
        FeaturesVectorChemicalEntity featuresVectorChemicalEntity = new FeaturesVectorChemicalEntity();
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\t");
        if (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            String str2 = null;
            if (stringTokenizer.hasMoreTokens()) {
                str2 = stringTokenizer.nextToken();
            }
            featuresVectorChemicalEntity.string = nextToken;
            featuresVectorChemicalEntity.label = str2;
            if (nextToken.length() == 1) {
                featuresVectorChemicalEntity.singleChar = true;
            }
            if (featureFactory.test_all_capital(nextToken)) {
                featuresVectorChemicalEntity.capitalisation = "ALLCAPS";
            } else if (featureFactory.test_first_capital(nextToken)) {
                featuresVectorChemicalEntity.capitalisation = "INITCAP";
            } else {
                featuresVectorChemicalEntity.capitalisation = "NOCAPS";
            }
            if (featureFactory.test_number(nextToken)) {
                featuresVectorChemicalEntity.digit = "ALLDIGIT";
            } else if (FeatureFactory.test_digit(nextToken)) {
                featuresVectorChemicalEntity.digit = "CONTAINDIGIT";
            } else {
                featuresVectorChemicalEntity.digit = "NODIGIT";
            }
            if (featureFactory.test_common(nextToken)) {
                featuresVectorChemicalEntity.commonName = true;
            }
            if (featureFactory.test_names(nextToken)) {
                featuresVectorChemicalEntity.properName = true;
            }
            if (featureFactory.isPunct.matcher(nextToken).find()) {
                featuresVectorChemicalEntity.punctType = "PUNCT";
            }
            if (nextToken.equals(TextUtilities.START_BRACKET) || nextToken.equals("[")) {
                featuresVectorChemicalEntity.punctType = "OPENBRACKET";
            } else if (nextToken.equals(TextUtilities.END_BRACKET) || nextToken.equals("]")) {
                featuresVectorChemicalEntity.punctType = "ENDBRACKET";
            } else if (nextToken.equals(".")) {
                featuresVectorChemicalEntity.punctType = "DOT";
            } else if (nextToken.equals(TextUtilities.COMMA)) {
                featuresVectorChemicalEntity.punctType = "COMMA";
            } else if (nextToken.equals("-")) {
                featuresVectorChemicalEntity.punctType = "HYPHEN";
            } else if (nextToken.equals(TextUtilities.DOUBLE_QUOTE) | nextToken.equals(TextUtilities.QUOTE) | nextToken.equals("`")) {
                featuresVectorChemicalEntity.punctType = "QUOTE";
            }
            if (featuresVectorChemicalEntity.capitalisation == null) {
                featuresVectorChemicalEntity.capitalisation = "NOCAPS";
            }
            if (featuresVectorChemicalEntity.digit == null) {
                featuresVectorChemicalEntity.digit = "NODIGIT";
            }
            if (featuresVectorChemicalEntity.punctType == null) {
                featuresVectorChemicalEntity.punctType = "NOPUNCT";
            }
            featuresVectorChemicalEntity.relativeDocumentPosition = featureFactory.linearScaling(i2, i, nbBins);
            if (z) {
                featuresVectorChemicalEntity.isKnownChemicalToken = true;
            }
            if (z2) {
                featuresVectorChemicalEntity.isKnownChemicalNameToken = true;
            }
        }
        return featuresVectorChemicalEntity;
    }
}
