package org.grobid.core.features;

import java.util.List;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.UnicodeUtil;

/* loaded from: input_file:org/grobid/core/features/FeaturesVectorCitation.class */
public class FeaturesVectorCitation {
    private static int nbBins = 12;
    public String digit;
    public String string = null;
    public String label = null;
    public String blockStatus = null;
    public String lineStatus = null;
    public String fontStatus = null;
    public String fontSize = null;
    public boolean bold = false;
    public boolean italic = false;
    public String capitalisation = null;
    public boolean singleChar = false;
    public boolean properName = false;
    public boolean commonName = false;
    public boolean firstName = false;
    public boolean lastName = false;
    public boolean year = false;
    public boolean month = false;
    public boolean http = false;
    public String punctType = null;
    public boolean containPunct = false;
    public int relativePosition = -1;
    public boolean isKnownJournalTitle = false;
    public boolean isKnownAbbrevJournalTitle = false;
    public boolean isKnownConferenceTitle = false;
    public boolean isKnownPublisher = false;
    public boolean isKnownLocation = false;
    public boolean isKnownCollaboration = false;
    public boolean isKnownIdentifier = false;

    public String printVector() {
        if (this.string == null || this.string.length() == 0) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        sb.append(this.string);
        sb.append(" ").append(this.string.toLowerCase());
        sb.append(" " + TextUtilities.prefix(this.string, 1));
        sb.append(" " + TextUtilities.prefix(this.string, 2));
        sb.append(" " + TextUtilities.prefix(this.string, 3));
        sb.append(" " + TextUtilities.prefix(this.string, 4));
        sb.append(" " + TextUtilities.suffix(this.string, 1));
        sb.append(" " + TextUtilities.suffix(this.string, 2));
        sb.append(" " + TextUtilities.suffix(this.string, 3));
        sb.append(" " + TextUtilities.suffix(this.string, 4));
        sb.append(" ").append(this.lineStatus);
        if (this.digit.equals("ALLDIGIT")) {
            sb.append(" NOCAPS");
        } else {
            sb.append(" ").append(this.capitalisation);
        }
        sb.append(" ").append(this.digit);
        if (this.singleChar) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.properName) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.commonName) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.firstName) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.lastName) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.isKnownLocation) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.year) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.month) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.http) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.isKnownCollaboration) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.isKnownJournalTitle || this.isKnownAbbrevJournalTitle) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.isKnownConferenceTitle) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.isKnownPublisher) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        if (this.isKnownIdentifier) {
            sb.append(" 1");
        } else {
            sb.append(" 0");
        }
        sb.append(" ").append(this.punctType);
        sb.append(" ").append(this.relativePosition);
        if (this.label != null) {
            sb.append(" ").append(this.label).append("\n");
        } else {
            sb.append(" 0\n");
        }
        return sb.toString();
    }

    public static String addFeaturesCitation(List<LayoutToken> list, List<String> list2, List<OffsetPosition> list3, List<OffsetPosition> list4, List<OffsetPosition> list5, List<OffsetPosition> list6, List<OffsetPosition> list7, List<OffsetPosition> list8, List<OffsetPosition> list9, List<OffsetPosition> list10) throws Exception {
        if (list3 == null || list4 == null || list5 == null || list6 == null || list7 == null || list8 == null || list9 == null || list10 == null) {
            throw new GrobidException("At least one list of gazetter matches positions is null.");
        }
        FeatureFactory featureFactory = FeatureFactory.getInstance();
        StringBuilder sb = new StringBuilder();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        int i7 = 0;
        int i8 = 0;
        int size = list.size();
        for (int i9 = 0; i9 < list.size(); i9++) {
            LayoutToken layoutToken = list.get(i9);
            String str = null;
            if (list2 != null && list2.size() > 0 && i9 < list2.size()) {
                str = list2.get(i9);
            }
            boolean z = false;
            boolean z2 = false;
            boolean z3 = false;
            boolean z4 = false;
            boolean z5 = false;
            boolean z6 = false;
            boolean z7 = false;
            boolean z8 = false;
            boolean z9 = false;
            boolean z10 = false;
            String text = layoutToken.getText();
            if (!text.equals(" ") && !text.equals("\n")) {
                String normaliseTextAndRemoveSpaces = UnicodeUtil.normaliseTextAndRemoveSpaces(text);
                if (normaliseTextAndRemoveSpaces.trim().length() != 0) {
                    if (list3 != null && list3.size() > 0) {
                        if (i == list3.size() - 1 && list3.get(i).end < i9) {
                            z10 = true;
                        }
                        if (!z10) {
                            int i10 = i;
                            while (true) {
                                if (i10 >= list3.size()) {
                                    break;
                                }
                                if (list3.get(i10).start <= i9 && list3.get(i10).end >= i9) {
                                    z2 = true;
                                    i = i10;
                                    break;
                                }
                                if (list3.get(i10).start > i9) {
                                    z2 = false;
                                    i = i10;
                                    break;
                                }
                                i10++;
                            }
                        }
                    }
                    boolean z11 = false;
                    if (list4 != null) {
                        if (i2 == list4.size() - 1 && list4.get(i2).end < i9) {
                            z11 = true;
                        }
                        if (!z11) {
                            int i11 = i2;
                            while (true) {
                                if (i11 >= list4.size()) {
                                    break;
                                }
                                if (list4.get(i11).start <= i9 && list4.get(i11).end >= i9) {
                                    z3 = true;
                                    i2 = i11;
                                    break;
                                }
                                if (list4.get(i11).start > i9) {
                                    z3 = false;
                                    i2 = i11;
                                    break;
                                }
                                i11++;
                            }
                        }
                    }
                    boolean z12 = false;
                    if (list5 != null) {
                        if (i3 == list5.size() - 1 && list5.get(i3).end < i9) {
                            z12 = true;
                        }
                        if (!z12) {
                            int i12 = i3;
                            while (true) {
                                if (i12 >= list5.size()) {
                                    break;
                                }
                                if (list5.get(i12).start <= i9 && list5.get(i12).end >= i9) {
                                    z4 = true;
                                    i3 = i12;
                                    break;
                                }
                                if (list5.get(i12).start > i9) {
                                    z4 = false;
                                    i3 = i12;
                                    break;
                                }
                                i12++;
                            }
                        }
                    }
                    boolean z13 = false;
                    if (list6 != null) {
                        if (i4 == list6.size() - 1 && list6.get(i4).end < i9) {
                            z13 = true;
                        }
                        if (!z13) {
                            int i13 = i4;
                            while (true) {
                                if (i13 >= list6.size()) {
                                    break;
                                }
                                if (list6.get(i13).start <= i9 && list6.get(i13).end >= i9) {
                                    z5 = true;
                                    i4 = i13;
                                    break;
                                }
                                if (list6.get(i13).start > i9) {
                                    z5 = false;
                                    i4 = i13;
                                    break;
                                }
                                i13++;
                            }
                        }
                    }
                    boolean z14 = false;
                    if (list7 != null) {
                        if (i5 == list7.size() - 1 && list7.get(i5).end < i9) {
                            z14 = true;
                        }
                        if (!z14) {
                            int i14 = i5;
                            while (true) {
                                if (i14 >= list7.size()) {
                                    break;
                                }
                                if (list7.get(i14).start <= i9 && list7.get(i14).end >= i9) {
                                    z6 = true;
                                    i5 = i14;
                                    break;
                                }
                                if (list7.get(i14).start > i9) {
                                    z6 = false;
                                    i5 = i14;
                                    break;
                                }
                                i14++;
                            }
                        }
                    }
                    boolean z15 = false;
                    if (list8 != null) {
                        if (i6 == list8.size() - 1 && list8.get(i6).end < i9) {
                            z15 = true;
                        }
                        if (!z15) {
                            int i15 = i6;
                            while (true) {
                                if (i15 >= list8.size()) {
                                    break;
                                }
                                if (list8.get(i15).start <= i9 && list8.get(i15).end >= i9) {
                                    z7 = true;
                                    i6 = i15;
                                    break;
                                }
                                if (list8.get(i15).start > i9) {
                                    z7 = false;
                                    i6 = i15;
                                    break;
                                }
                                i15++;
                            }
                        }
                    }
                    boolean z16 = false;
                    if (list9 != null) {
                        if (i7 == list9.size() - 1 && list9.get(i7).end < i9) {
                            z16 = true;
                        }
                        if (!z16) {
                            int i16 = i7;
                            while (true) {
                                if (i16 >= list9.size()) {
                                    break;
                                }
                                if (list9.get(i16).start <= i9 && list9.get(i16).end >= i9) {
                                    z8 = true;
                                    i7 = i16;
                                    break;
                                }
                                if (list9.get(i16).start > i9) {
                                    z8 = false;
                                    i7 = i16;
                                    break;
                                }
                                i16++;
                            }
                        }
                    }
                    boolean z17 = false;
                    if (list10 != null) {
                        if (i8 == list10.size() - 1 && list10.get(i8).end < i9) {
                            z17 = true;
                        }
                        if (!z17) {
                            int i17 = i8;
                            while (true) {
                                if (i17 >= list10.size()) {
                                    break;
                                }
                                if (list10.get(i17).start <= i9 && list10.get(i17).end >= i9) {
                                    z9 = true;
                                    i8 = i17;
                                    break;
                                }
                                if (list10.get(i17).start > i9) {
                                    z9 = false;
                                    i8 = i17;
                                    break;
                                }
                                i17++;
                            }
                        }
                    }
                    if (!TextUtilities.filterLine(normaliseTextAndRemoveSpaces)) {
                        FeaturesVectorCitation featuresVectorCitation = new FeaturesVectorCitation();
                        featuresVectorCitation.string = normaliseTextAndRemoveSpaces;
                        featuresVectorCitation.relativePosition = featureFactory.linearScaling(i9, size, nbBins);
                        if (i9 == 0) {
                            featuresVectorCitation.lineStatus = "LINESTART";
                            z = true;
                        }
                        if (featureFactory.isPunct.matcher(normaliseTextAndRemoveSpaces).find()) {
                            featuresVectorCitation.punctType = "PUNCT";
                        }
                        if (normaliseTextAndRemoveSpaces.equals(TextUtilities.START_BRACKET) || normaliseTextAndRemoveSpaces.equals("[")) {
                            featuresVectorCitation.punctType = "OPENBRACKET";
                        } else if (normaliseTextAndRemoveSpaces.equals(TextUtilities.END_BRACKET) || normaliseTextAndRemoveSpaces.equals("]")) {
                            featuresVectorCitation.punctType = "ENDBRACKET";
                        } else if (normaliseTextAndRemoveSpaces.equals(".")) {
                            featuresVectorCitation.punctType = "DOT";
                        } else if (normaliseTextAndRemoveSpaces.equals(TextUtilities.COMMA)) {
                            featuresVectorCitation.punctType = "COMMA";
                        } else if (normaliseTextAndRemoveSpaces.equals("-")) {
                            featuresVectorCitation.punctType = "HYPHEN";
                        } else if (normaliseTextAndRemoveSpaces.equals(TextUtilities.DOUBLE_QUOTE) | normaliseTextAndRemoveSpaces.equals(TextUtilities.QUOTE) | normaliseTextAndRemoveSpaces.equals("`")) {
                            featuresVectorCitation.punctType = "QUOTE";
                        }
                        if (i9 == 0) {
                            if (!z) {
                                featuresVectorCitation.lineStatus = "LINESTART";
                                z = true;
                            }
                        } else if (list.size() == i9 + 1 && !z) {
                            featuresVectorCitation.lineStatus = "LINEEND";
                            z = true;
                        }
                        if (!z) {
                            featuresVectorCitation.lineStatus = "LINEIN";
                        }
                        if (normaliseTextAndRemoveSpaces.length() == 1) {
                            featuresVectorCitation.singleChar = true;
                        }
                        if (Character.isUpperCase(normaliseTextAndRemoveSpaces.charAt(0))) {
                            featuresVectorCitation.capitalisation = "INITCAP";
                        }
                        if (featureFactory.test_all_capital(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.capitalisation = "ALLCAP";
                        }
                        if (FeatureFactory.test_digit(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.digit = "CONTAINSDIGITS";
                        }
                        if (featureFactory.test_common(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.commonName = true;
                        }
                        if (featureFactory.test_names(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.properName = true;
                        }
                        if (featureFactory.test_month(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.month = true;
                        }
                        if (featureFactory.test_last_names(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.lastName = true;
                        }
                        if (featureFactory.test_first_names(normaliseTextAndRemoveSpaces)) {
                            featuresVectorCitation.firstName = true;
                        }
                        if (featureFactory.isDigit.matcher(normaliseTextAndRemoveSpaces).find()) {
                            featuresVectorCitation.digit = "ALLDIGIT";
                        }
                        if (featureFactory.year.matcher(normaliseTextAndRemoveSpaces).find()) {
                            featuresVectorCitation.year = true;
                        }
                        if (z7) {
                            featuresVectorCitation.isKnownCollaboration = true;
                        }
                        if (featuresVectorCitation.capitalisation == null) {
                            featuresVectorCitation.capitalisation = "NOCAPS";
                        }
                        if (featuresVectorCitation.digit == null) {
                            featuresVectorCitation.digit = "NODIGIT";
                        }
                        if (featuresVectorCitation.punctType == null) {
                            featuresVectorCitation.punctType = "NOPUNCT";
                        }
                        if (z2) {
                            featuresVectorCitation.isKnownJournalTitle = true;
                        }
                        if (z3) {
                            featuresVectorCitation.isKnownAbbrevJournalTitle = true;
                        }
                        if (z4) {
                            featuresVectorCitation.isKnownConferenceTitle = true;
                        }
                        if (z5) {
                            featuresVectorCitation.isKnownPublisher = true;
                        }
                        if (z6) {
                            featuresVectorCitation.isKnownLocation = true;
                        }
                        if (z8) {
                            featuresVectorCitation.isKnownIdentifier = true;
                        }
                        if (z9) {
                            featuresVectorCitation.http = true;
                        }
                        featuresVectorCitation.label = str;
                        sb.append(featuresVectorCitation.printVector());
                    }
                }
            }
        }
        return sb.toString();
    }
}
