package org.grobid.core.document;

import com.google.common.base.Joiner;
import com.google.common.collect.Sets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import nu.xom.Attribute;
import nu.xom.Element;
import nu.xom.Node;
import nu.xom.Text;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.fontbox.ttf.HeaderTable;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.xalan.templates.Constants;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.grobid.core.GrobidModels;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.Date;
import org.grobid.core.data.Equation;
import org.grobid.core.data.Figure;
import org.grobid.core.data.Keyword;
import org.grobid.core.data.Table;
import org.grobid.core.document.xml.XmlBuilderUtils;
import org.grobid.core.engines.Engine;
import org.grobid.core.engines.FullTextParser;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.label.SegmentationLabels;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.engines.label.TaggingLabels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.lang.Language;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.GraphicObject;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.layout.LayoutTokenization;
import org.grobid.core.tokenization.TaggingTokenCluster;
import org.grobid.core.tokenization.TaggingTokenClusteror;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.KeyGen;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.matching.EntityMatcherException;
import org.grobid.core.utilities.matching.ReferenceMarkerMatcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/grobid/core/document/TEIFormatter.class */
public class TEIFormatter {
    private Document doc;
    private FullTextParser fullTextParser;
    private Boolean inParagraph = false;
    private ArrayList<String> elements = null;
    private static final int ITALIC_POS = 16;
    private static final int BOLD_POS = 15;
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) TEIFormatter.class);
    public static final Set<TaggingLabel> MARKER_LABELS = Sets.newHashSet(TaggingLabels.CITATION_MARKER, TaggingLabels.FIGURE_MARKER, TaggingLabels.TABLE_MARKER, TaggingLabels.EQUATION_MARKER);
    private static Pattern numberRef = Pattern.compile("(\\[|\\()\\d+\\w?(\\)|\\])");
    private static Pattern numberRefCompact = Pattern.compile("(\\[|\\()((\\d)+(\\w)?(\\-\\d+\\w?)?,\\s?)+(\\d+\\w?)(\\-\\d+\\w?)?(\\)|\\])");
    private static Pattern numberRefCompact2 = Pattern.compile("(\\[|\\()(\\d+)(-|‒|–|—|―|–)(\\d+)(\\)|\\])");
    private static Pattern startNum = Pattern.compile("^(\\d+)(.*)");

    /* loaded from: input_file:org/grobid/core/document/TEIFormatter$SchemaDeclaration.class */
    public enum SchemaDeclaration {
        DEFAULT,
        DTD,
        XSD,
        RNG,
        RNC
    }

    public TEIFormatter(Document document, FullTextParser fullTextParser) {
        this.doc = null;
        this.fullTextParser = null;
        this.doc = document;
        this.fullTextParser = fullTextParser;
    }

    public StringBuilder toTEIHeader(BiblioItem biblioItem, String str, List<BibDataSet> list, GrobidAnalysisConfig grobidAnalysisConfig) {
        return toTEIHeader(biblioItem, SchemaDeclaration.XSD, str, list, grobidAnalysisConfig);
    }

    public StringBuilder toTEIHeader(BiblioItem biblioItem, SchemaDeclaration schemaDeclaration, String str, List<BibDataSet> list, GrobidAnalysisConfig grobidAnalysisConfig) {
        Language runLanguageId;
        StringBuilder sb = new StringBuilder();
        sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        if (grobidAnalysisConfig.isWithXslStylesheet()) {
            sb.append("<?xml-stylesheet type=\"text/xsl\" href=\"../jsp/xmlverbatimwrapper.xsl\"?> \n");
        }
        if (schemaDeclaration == SchemaDeclaration.DTD) {
            sb.append("<!DOCTYPE TEI SYSTEM \"" + GrobidProperties.get_GROBID_HOME_PATH() + "/schemas/dtd/Grobid.dtd\">\n");
        } else if (schemaDeclaration == SchemaDeclaration.XSD) {
            sb.append("<TEI xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" \nxmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" \nxsi:schemaLocation=\"http://www.tei-c.org/ns/1.0 " + GrobidProperties.get_GROBID_HOME_PATH() + "/schemas/xsd/Grobid.xsd\"\n xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n");
        } else if (schemaDeclaration == SchemaDeclaration.RNG) {
            sb.append("<?xml-model href=\"file://" + GrobidProperties.get_GROBID_HOME_PATH() + "/schemas/rng/Grobid.rng\" schematypens=\"http://relaxng.org/ns/structure/1.0\"?>\n");
        } else if (schemaDeclaration == SchemaDeclaration.RNC) {
            sb.append("<?xml-model href=\"file://" + GrobidProperties.get_GROBID_HOME_PATH() + "/schemas/rng/Grobid.rnc\" type=\"application/relax-ng-compact-syntax\"?>\n");
        }
        if (schemaDeclaration != SchemaDeclaration.XSD) {
            sb.append("<TEI xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\">\n");
        }
        if (this.doc.getLanguage() != null) {
            sb.append("\t<teiHeader xml:lang=\"" + this.doc.getLanguage() + "\">");
        } else {
            sb.append("\t<teiHeader>");
        }
        sb.append("\n\t\t<fileDesc>\n\t\t\t<titleStmt>\n\t\t\t\t<title level=\"a\" type=\"main\"");
        if (grobidAnalysisConfig.isGenerateTeiIds()) {
            sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
        }
        sb.append(TextUtilities.GREATER_THAN);
        if (biblioItem == null) {
            biblioItem = new BiblioItem();
        }
        if (biblioItem.getTitle() != null) {
            sb.append(TextUtilities.HTMLEncode(biblioItem.getTitle()));
        }
        sb.append("</title>\n\t\t\t</titleStmt>\n");
        if (biblioItem.getPublisher() == null && biblioItem.getPublicationDate() == null && biblioItem.getNormalizedPublicationDate() == null) {
            sb.append("\t\t\t<publicationStmt>\n");
            sb.append("\t\t\t\t<publisher/>\n");
            sb.append("\t\t\t\t<availability status=\"unknown\"><licence/></availability>\n");
            sb.append("\t\t\t</publicationStmt>\n");
        } else {
            sb.append("\t\t\t<publicationStmt>\n");
            if (biblioItem.getPublisher() != null) {
                sb.append("\t\t\t\t<publisher>" + TextUtilities.HTMLEncode(biblioItem.getPublisher()) + "</publisher>\n");
                sb.append("\t\t\t\t<availability status=\"unknown\">");
                sb.append("<p>Copyright ");
                sb.append(TextUtilities.HTMLEncode(biblioItem.getPublisher()) + "</p>\n");
                sb.append("\t\t\t\t</availability>\n");
            } else {
                sb.append("\t\t\t\t<publisher/>\n");
                if (str == null) {
                    sb.append("\t\t\t\t<availability status=\"unknown\"><licence/></availability>");
                } else {
                    sb.append("\t\t\t\t<availability status=\"unknown\"><p>" + str + "</p></availability>");
                }
                sb.append("\n");
            }
            if (biblioItem.getNormalizedPublicationDate() != null) {
                Date normalizedPublicationDate = biblioItem.getNormalizedPublicationDate();
                int year = normalizedPublicationDate.getYear();
                int month = normalizedPublicationDate.getMonth();
                int day = normalizedPublicationDate.getDay();
                String str2 = "";
                if (year != -1) {
                    str2 = year <= 9 ? str2 + "000" + year : year <= 99 ? str2 + "00" + year : year <= 999 ? str2 + "0" + year : str2 + year;
                    if (month != -1) {
                        str2 = month <= 9 ? str2 + "-0" + month : str2 + "-" + month;
                        if (day != -1) {
                            str2 = day <= 9 ? str2 + "-0" + day : str2 + "-" + day;
                        }
                    }
                    sb.append("\t\t\t\t<date type=\"published\" when=\"");
                    sb.append(str2 + "\">");
                } else {
                    sb.append("\t\t\t\t<date>");
                }
                if (biblioItem.getPublicationDate() != null) {
                    sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()));
                } else {
                    sb.append(str2);
                }
                sb.append("</date>\n");
            } else if (biblioItem.getYear() != null && biblioItem.getYear().length() > 0) {
                String str3 = "";
                if (biblioItem.getYear().length() == 1) {
                    str3 = str3 + "000" + biblioItem.getYear();
                } else if (biblioItem.getYear().length() == 2) {
                    str3 = str3 + "00" + biblioItem.getYear();
                } else if (biblioItem.getYear().length() == 3) {
                    str3 = str3 + "0" + biblioItem.getYear();
                } else if (biblioItem.getYear().length() == 4) {
                    str3 = str3 + biblioItem.getYear();
                }
                if (biblioItem.getMonth() != null && biblioItem.getMonth().length() > 0) {
                    str3 = biblioItem.getMonth().length() == 1 ? str3 + "-0" + biblioItem.getMonth() : str3 + "-" + biblioItem.getMonth();
                    if (biblioItem.getDay() != null && biblioItem.getDay().length() > 0) {
                        str3 = biblioItem.getDay().length() == 1 ? str3 + "-0" + biblioItem.getDay() : str3 + "-" + biblioItem.getDay();
                    }
                }
                sb.append("\t\t\t\t<date type=\"published\" when=\"");
                sb.append(str3 + "\">");
                if (biblioItem.getPublicationDate() != null) {
                    sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()));
                } else {
                    sb.append(str3);
                }
                sb.append("</date>\n");
            } else if (biblioItem.getE_Year() != null) {
                String str4 = "";
                if (biblioItem.getE_Year().length() == 1) {
                    str4 = str4 + "000" + biblioItem.getE_Year();
                } else if (biblioItem.getE_Year().length() == 2) {
                    str4 = str4 + "00" + biblioItem.getE_Year();
                } else if (biblioItem.getE_Year().length() == 3) {
                    str4 = str4 + "0" + biblioItem.getE_Year();
                } else if (biblioItem.getE_Year().length() == 4) {
                    str4 = str4 + biblioItem.getE_Year();
                }
                if (biblioItem.getE_Month() != null) {
                    str4 = biblioItem.getE_Month().length() == 1 ? str4 + "-0" + biblioItem.getE_Month() : str4 + "-" + biblioItem.getE_Month();
                    if (biblioItem.getE_Day() != null) {
                        str4 = biblioItem.getE_Day().length() == 1 ? str4 + "-0" + biblioItem.getE_Day() : str4 + "-" + biblioItem.getE_Day();
                    }
                }
                sb.append("\t\t\t\t<date type=\"ePublished\" when=\"");
                sb.append(str4 + "\">");
                if (biblioItem.getPublicationDate() != null) {
                    sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()));
                } else {
                    sb.append(str4);
                }
                sb.append("</date>\n");
            } else if (biblioItem.getPublicationDate() != null) {
                sb.append("\t\t\t\t<date type=\"published\">");
                sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()) + "</date>");
            }
            sb.append("\t\t\t</publicationStmt>\n");
        }
        sb.append("\t\t\t<sourceDesc>\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n");
        sb.append(biblioItem.toTEIAuthorBlock(6, grobidAnalysisConfig));
        String title = biblioItem.getTitle();
        String language = biblioItem.getLanguage();
        String englishTitle = biblioItem.getEnglishTitle();
        if (title != null) {
            sb.append("\t\t\t\t\t\t<title");
            sb.append(" level=\"a\" type=\"main\"");
            if (grobidAnalysisConfig.isGenerateTeiIds()) {
                sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
            }
            if (englishTitle == null) {
                sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(title) + "</title>\n");
            } else {
                sb.append(" xml:lang=\"" + language + "\">" + TextUtilities.HTMLEncode(title) + "</title>\n");
            }
        }
        boolean z = false;
        boolean isGenerateTeiIds = grobidAnalysisConfig.isGenerateTeiIds();
        if (englishTitle != null && (runLanguageId = LanguageUtilities.getInstance().runLanguageId(englishTitle)) != null && runLanguageId.getLang().equals(Language.EN)) {
            z = true;
            sb.append("\t\t\t\t\t\t<title");
            sb.append(" level=\"a\"");
            if (isGenerateTeiIds) {
                sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
            }
            sb.append(" xml:lang=\"en\">").append(TextUtilities.HTMLEncode(englishTitle)).append("</title>\n");
        }
        sb.append("\t\t\t\t\t</analytic>\n");
        if (biblioItem.getJournal() != null || biblioItem.getJournalAbbrev() != null || biblioItem.getISSN() != null || biblioItem.getISSNe() != null || biblioItem.getPublisher() != null || biblioItem.getPublicationDate() != null || biblioItem.getVolumeBlock() != null || biblioItem.getItem() == 1 || biblioItem.getItem() == 6 || biblioItem.getItem() == 13 || biblioItem.getItem() == 5 || biblioItem.getItem() == 0 || biblioItem.getItem() == 14 || biblioItem.getItem() == 7) {
            sb.append("\t\t\t\t\t<monogr");
            sb.append(">\n");
            if (biblioItem.getJournal() != null) {
                sb.append("\t\t\t\t\t\t<title level=\"j\" type=\"main\"");
                if (isGenerateTeiIds) {
                    sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
                }
                sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(biblioItem.getJournal()) + "</title>\n");
            } else if (biblioItem.getBookTitle() != null) {
                sb.append("\t\t\t\t\t\t<title level=\"m\"");
                if (isGenerateTeiIds) {
                    sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
                }
                sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(biblioItem.getBookTitle()) + "</title>\n");
            }
            if (biblioItem.getJournalAbbrev() != null) {
                sb.append("\t\t\t\t\t\t<title level=\"j\" type=\"abbrev\">" + TextUtilities.HTMLEncode(biblioItem.getJournalAbbrev()) + "</title>\n");
            }
            if (biblioItem.getISSN() != null) {
                sb.append("\t\t\t\t\t\t<idno type=\"ISSN\">" + TextUtilities.HTMLEncode(biblioItem.getISSN()) + "</idno>\n");
            }
            if (biblioItem.getISSNe() != null && !biblioItem.getISSNe().equals(biblioItem.getISSN())) {
                sb.append("\t\t\t\t\t\t<idno type=\"eISSN\">" + TextUtilities.HTMLEncode(biblioItem.getISSNe()) + "</idno>\n");
            }
            String bookTitle = biblioItem.getBookTitle();
            boolean z2 = false;
            if (biblioItem.getEvent() != null) {
                biblioItem.getEvent();
            } else if (bookTitle != null) {
                String trim = bookTitle.trim();
                Iterator<String> it = BiblioItem.confPrefixes.iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    String next = it.next();
                    if (trim.startsWith(next)) {
                        sb.append("\t\t\t\t\t\t<meeting>" + TextUtilities.HTMLEncode(trim.replace(next, "").trim()));
                        if (biblioItem.getLocation() != null || biblioItem.getTown() != null || biblioItem.getCountry() != null) {
                            sb.append(" <address>");
                            if (biblioItem.getTown() != null) {
                                sb.append("<settlement>" + biblioItem.getTown() + "</settlement>");
                            }
                            if (biblioItem.getCountry() != null) {
                                sb.append("<country>" + biblioItem.getCountry() + "</country>");
                            }
                            if (biblioItem.getLocation() != null && biblioItem.getTown() == null && biblioItem.getCountry() == null) {
                                sb.append("<addrLine>" + TextUtilities.HTMLEncode(biblioItem.getLocation()) + "</addrLine>");
                            }
                            sb.append("</address>\n");
                            z2 = true;
                        }
                        sb.append("\t\t\t\t\t\t</meeting>\n");
                    }
                }
            }
            if ((biblioItem.getLocation() != null || biblioItem.getTown() != null || biblioItem.getCountry() != null) && !z2) {
                sb.append("\t\t\t\t\t\t<meeting>");
                sb.append(" <address>");
                if (biblioItem.getTown() != null) {
                    sb.append(" <settlement>" + biblioItem.getTown() + "</settlement>");
                }
                if (biblioItem.getCountry() != null) {
                    sb.append(" <country>" + biblioItem.getCountry() + "</country>");
                }
                if (biblioItem.getLocation() != null && biblioItem.getTown() == null && biblioItem.getCountry() == null) {
                    sb.append("<addrLine>" + TextUtilities.HTMLEncode(biblioItem.getLocation()) + "</addrLine>");
                }
                sb.append("</address>\n");
                sb.append("\t\t\t\t\t\t</meeting>\n");
            }
            String pageRange = biblioItem.getPageRange();
            if ((biblioItem.getVolumeBlock() != null) | (biblioItem.getPublicationDate() != null) | (biblioItem.getNormalizedPublicationDate() != null) | (pageRange != null) | (biblioItem.getIssue() != null) | (biblioItem.getBeginPage() != -1) | (biblioItem.getPublisher() != null)) {
                sb.append("\t\t\t\t\t\t<imprint>\n");
                if (biblioItem.getPublisher() != null) {
                    sb.append("\t\t\t\t\t\t\t<publisher>" + TextUtilities.HTMLEncode(biblioItem.getPublisher()) + "</publisher>\n");
                }
                if (biblioItem.getVolumeBlock() != null) {
                    sb.append("\t\t\t\t\t\t\t<biblScope unit=\"volume\">" + TextUtilities.HTMLEncode(biblioItem.getVolumeBlock().replace(" ", "").trim()) + "</biblScope>\n");
                }
                if (biblioItem.getIssue() != null) {
                    sb.append("\t\t\t\t\t\t\t<biblScope unit=\"issue\">" + TextUtilities.HTMLEncode(biblioItem.getIssue()) + "</biblScope>\n");
                }
                if (pageRange != null) {
                    StringTokenizer stringTokenizer = new StringTokenizer(pageRange, "--");
                    if (stringTokenizer.countTokens() == 2) {
                        sb.append("\t\t\t\t\t\t\t<biblScope unit=\"page\"");
                        sb.append(" from=\"" + TextUtilities.HTMLEncode(stringTokenizer.nextToken()) + "\"");
                        sb.append(" to=\"" + TextUtilities.HTMLEncode(stringTokenizer.nextToken()) + "\"/>\n");
                    } else {
                        sb.append("\t\t\t\t\t\t\t<biblScope unit=\"page\">" + TextUtilities.HTMLEncode(pageRange) + "</biblScope>\n");
                    }
                } else if (biblioItem.getBeginPage() != -1) {
                    if (biblioItem.getEndPage() != -1) {
                        sb.append("\t\t\t\t\t\t\t<biblScope unit=\"page\"");
                        sb.append(" from=\"" + biblioItem.getBeginPage() + "\"");
                        sb.append(" to=\"" + biblioItem.getEndPage() + "\"/>\n");
                    } else {
                        sb.append("\t\t\t\t\t\t\t<biblScope unit=\"page\"");
                        sb.append(" from=\"" + biblioItem.getBeginPage() + "\"/>\n");
                    }
                }
                if (biblioItem.getNormalizedPublicationDate() != null) {
                    Date normalizedPublicationDate2 = biblioItem.getNormalizedPublicationDate();
                    int year2 = normalizedPublicationDate2.getYear();
                    int month2 = normalizedPublicationDate2.getMonth();
                    int day2 = normalizedPublicationDate2.getDay();
                    if (year2 != -1) {
                        String str5 = year2 <= 9 ? "000" + year2 : year2 <= 99 ? "00" + year2 : year2 <= 999 ? "0" + year2 : "" + year2;
                        if (month2 != -1) {
                            str5 = month2 <= 9 ? str5 + "-0" + month2 : str5 + "-" + month2;
                            if (day2 != -1) {
                                str5 = day2 <= 9 ? str5 + "-0" + day2 : str5 + "-" + day2;
                            }
                        }
                        if (biblioItem.getPublicationDate() != null) {
                            sb.append("\t\t\t\t\t\t\t<date type=\"published\" when=\"");
                            sb.append(str5 + "\">");
                            sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()) + "</date>\n");
                        } else {
                            sb.append("\t\t\t\t\t\t\t<date type=\"published\" when=\"");
                            sb.append(str5 + "\" />\n");
                        }
                    } else if (biblioItem.getPublicationDate() != null) {
                        sb.append("\t\t\t\t\t\t\t<date type=\"published\">");
                        sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()) + "</date>\n");
                    }
                } else if (biblioItem.getYear() != null) {
                    String str6 = "";
                    if (biblioItem.getYear().length() == 1) {
                        str6 = str6 + "000" + biblioItem.getYear();
                    } else if (biblioItem.getYear().length() == 2) {
                        str6 = str6 + "00" + biblioItem.getYear();
                    } else if (biblioItem.getYear().length() == 3) {
                        str6 = str6 + "0" + biblioItem.getYear();
                    } else if (biblioItem.getYear().length() == 4) {
                        str6 = str6 + biblioItem.getYear();
                    }
                    if (biblioItem.getMonth() != null) {
                        str6 = biblioItem.getMonth().length() == 1 ? str6 + "-0" + biblioItem.getMonth() : str6 + "-" + biblioItem.getMonth();
                        if (biblioItem.getDay() != null) {
                            str6 = biblioItem.getDay().length() == 1 ? str6 + "-0" + biblioItem.getDay() : str6 + "-" + biblioItem.getDay();
                        }
                    }
                    if (biblioItem.getPublicationDate() != null) {
                        sb.append("\t\t\t\t\t\t\t<date type=\"published\" when=\"");
                        sb.append(str6 + "\">");
                        sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()) + "</date>\n");
                    } else {
                        sb.append("\t\t\t\t\t\t\t<date type=\"published\" when=\"");
                        sb.append(str6 + "\" />\n");
                    }
                } else if (biblioItem.getE_Year() != null) {
                    String str7 = "";
                    if (biblioItem.getE_Year().length() == 1) {
                        str7 = str7 + "000" + biblioItem.getE_Year();
                    } else if (biblioItem.getE_Year().length() == 2) {
                        str7 = str7 + "00" + biblioItem.getE_Year();
                    } else if (biblioItem.getE_Year().length() == 3) {
                        str7 = str7 + "0" + biblioItem.getE_Year();
                    } else if (biblioItem.getE_Year().length() == 4) {
                        str7 = str7 + biblioItem.getE_Year();
                    }
                    if (biblioItem.getE_Month() != null) {
                        str7 = biblioItem.getE_Month().length() == 1 ? str7 + "-0" + biblioItem.getE_Month() : str7 + "-" + biblioItem.getE_Month();
                        if (biblioItem.getE_Day() != null) {
                            str7 = biblioItem.getE_Day().length() == 1 ? str7 + "-0" + biblioItem.getE_Day() : str7 + "-" + biblioItem.getE_Day();
                        }
                    }
                    sb.append("\t\t\t\t\t\t\t<date type=\"ePublished\" when=\"");
                    sb.append(str7 + "\" />\n");
                } else if (biblioItem.getPublicationDate() != null) {
                    sb.append("\t\t\t\t\t\t\t<date type=\"published\">");
                    sb.append(TextUtilities.HTMLEncode(biblioItem.getPublicationDate()) + "</date>\n");
                }
                sb.append("\t\t\t\t\t\t</imprint>\n");
            }
            sb.append("\t\t\t\t\t</monogr>\n");
        } else {
            sb.append("\t\t\t\t\t<monogr>\n");
            sb.append("\t\t\t\t\t\t<imprint>\n");
            sb.append("\t\t\t\t\t\t\t<date/>\n");
            sb.append("\t\t\t\t\t\t</imprint>\n");
            sb.append("\t\t\t\t\t</monogr>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getDOI())) {
            String HTMLEncode = TextUtilities.HTMLEncode(biblioItem.getDOI());
            if (HTMLEncode.endsWith(".xml")) {
                HTMLEncode = HTMLEncode.replace(".xml", "");
            }
            sb.append("\t\t\t\t\t<idno type=\"DOI\">" + HTMLEncode + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getArXivId())) {
            sb.append("\t\t\t\t\t<idno type=\"arXiv\">" + TextUtilities.HTMLEncode(biblioItem.getArXivId()) + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getPMID())) {
            sb.append("\t\t\t\t\t<idno type=\"PMID\">" + TextUtilities.HTMLEncode(biblioItem.getPMID()) + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getPMCID())) {
            sb.append("\t\t\t\t\t<idno type=\"PMCID\">" + TextUtilities.HTMLEncode(biblioItem.getPMCID()) + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getPII())) {
            sb.append("\t\t\t\t\t<idno type=\"PII\">" + TextUtilities.HTMLEncode(biblioItem.getPII()) + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getArk())) {
            sb.append("\t\t\t\t\t<idno type=\"ark\">" + TextUtilities.HTMLEncode(biblioItem.getArk()) + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getIstexId())) {
            sb.append("\t\t\t\t\t<idno type=\"istexId\">" + TextUtilities.HTMLEncode(biblioItem.getIstexId()) + "</idno>\n");
        }
        if (!StringUtils.isEmpty(biblioItem.getOAURL())) {
            sb.append("\t\t\t\t\t<ptr type=\"open-access\" target=\"").append(TextUtilities.HTMLEncode(biblioItem.getOAURL())).append("\" />\n");
        }
        if (biblioItem.getSubmission() != null) {
            sb.append("\t\t\t\t\t<note type=\"submission\">" + TextUtilities.HTMLEncode(biblioItem.getSubmission()) + "</note>\n");
        }
        if (biblioItem.getDedication() != null) {
            sb.append("\t\t\t\t\t<note type=\"dedication\">" + TextUtilities.HTMLEncode(biblioItem.getDedication()) + "</note>\n");
        }
        if ((englishTitle != null) & (!z)) {
            sb.append("\t\t\t\t\t<note type=\"title\"");
            if (isGenerateTeiIds) {
                sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
            }
            sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(englishTitle) + "</note>\n");
        }
        if (biblioItem.getNote() != null) {
            sb.append("\t\t\t\t\t<note");
            if (isGenerateTeiIds) {
                sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
            }
            sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(biblioItem.getNote()) + "</note>\n");
        }
        sb.append("\t\t\t\t</biblStruct>\n");
        if (biblioItem.getURL() != null) {
            sb.append("\t\t\t\t<ref target=\"" + biblioItem.getURL() + "\" />\n");
        }
        sb.append("\t\t\t</sourceDesc>\n");
        sb.append("\t\t</fileDesc>\n");
        sb.append("\n\t\t<encodingDesc>\n");
        sb.append("\t\t\t<appInfo>\n");
        TimeZone timeZone = TimeZone.getTimeZone("UTC");
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mmZ");
        simpleDateFormat.setTimeZone(timeZone);
        sb.append("\t\t\t\t<application version=\"" + GrobidProperties.getVersion() + "\" ident=\"GROBID-SDO\" when=\"" + simpleDateFormat.format(new java.util.Date()) + "\">\n");
        sb.append("\t\t\t\t\t<desc>GROBID - A machine learning software for extracting information from scholarly documents</desc>\n");
        sb.append("\t\t\t\t\t<ref target=\"https://github.com/kermitt2/grobid-sdo\"/>\n");
        sb.append("\t\t\t\t</application>\n");
        sb.append("\t\t\t</appInfo>\n");
        sb.append("\t\t</encodingDesc>\n");
        boolean z3 = false;
        sb.append("\t\t<profileDesc>\n");
        if (biblioItem.getKeywords() != null && biblioItem.getKeywords().size() > 0) {
            z3 = true;
            sb.append("\t\t\t<textClass>\n");
            sb.append("\t\t\t\t<keywords>\n");
            List<Keyword> keywords = biblioItem.getKeywords();
            int i = 0;
            for (Keyword keyword : keywords) {
                if (keyword.getKeyword() != null && keyword.getKeyword().length() != 0) {
                    String trim2 = keyword.getKeyword().trim();
                    if (trim2.startsWith(TextUtilities.COLON)) {
                        trim2 = trim2.substring(1);
                    }
                    if (i == keywords.size() - 1 && trim2.endsWith(Constants.ATTRVAL_THIS)) {
                        trim2 = trim2.substring(0, trim2.length() - 1);
                    }
                    sb.append("\t\t\t\t\t<term");
                    if (isGenerateTeiIds) {
                        sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
                    }
                    sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(trim2) + "</term>\n");
                    i++;
                }
            }
            sb.append("\t\t\t\t</keywords>\n");
        } else if (biblioItem.getKeyword() != null) {
            biblioItem.getKeyword();
            z3 = true;
            sb.append("\t\t\t<textClass>\n");
            sb.append("\t\t\t\t<keywords");
            if (isGenerateTeiIds) {
                sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
            }
            sb.append(TextUtilities.GREATER_THAN);
            sb.append(TextUtilities.HTMLEncode(biblioItem.getKeyword())).append("</keywords>\n");
        }
        if (biblioItem.getCategories() != null) {
            if (!z3) {
                z3 = true;
                sb.append("\t\t\t<textClass>\n");
            }
            List<String> categories = biblioItem.getCategories();
            sb.append("\t\t\t\t<keywords>");
            for (String str8 : categories) {
                sb.append("\t\t\t\t\t<term");
                if (isGenerateTeiIds) {
                    sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
                }
                sb.append(TextUtilities.GREATER_THAN + TextUtilities.HTMLEncode(str8.trim()) + "</term>\n");
            }
            sb.append("\t\t\t\t</keywords>\n");
        }
        if (z3) {
            sb.append("\t\t\t</textClass>\n");
        }
        String str9 = biblioItem.getAbstract();
        Language runLanguageId2 = str9 != null ? LanguageUtilities.getInstance().runLanguageId(str9) : null;
        if (runLanguageId2 != null) {
            String lang = runLanguageId2.getLang();
            if (lang.equals(this.doc.getLanguage())) {
                sb.append("\t\t\t<abstract>\n");
            } else {
                sb.append("\t\t\t<abstract xml:lang=\"").append(lang).append("\">\n");
            }
        } else if (str9 == null || str9.length() == 0) {
            sb.append("\t\t\t<abstract/>\n");
        } else {
            sb.append("\t\t\t<abstract>\n");
        }
        if (str9 != null && str9.length() != 0) {
            if (biblioItem.getLabeledAbstract() == null || biblioItem.getLabeledAbstract().length() <= 0) {
                sb.append("\t\t\t\t<p");
                if (isGenerateTeiIds) {
                    sb.append(" xml:id=\"_" + KeyGen.getKey().substring(0, 7) + "\"");
                }
                sb.append(TextUtilities.GREATER_THAN).append(TextUtilities.HTMLEncode(str9)).append("</p>");
            } else {
                try {
                    sb.append(toTEITextPiece(new StringBuilder(), biblioItem.getLabeledAbstract(), biblioItem, list, false, new LayoutTokenization(biblioItem.getLayoutTokens(TaggingLabels.HEADER_ABSTRACT)), null, null, null, this.doc, grobidAnalysisConfig).toString());
                } catch (Exception e) {
                    throw new GrobidException("An exception occurred while serializing TEI.", e);
                }
            }
            sb.append("\n\t\t\t</abstract>\n");
        }
        sb.append("\t\t</profileDesc>\n");
        if ((biblioItem.getA_Year() != null) | (biblioItem.getS_Year() != null) | (biblioItem.getSubmissionDate() != null) | (biblioItem.getNormalizedSubmissionDate() != null)) {
            sb.append("\t\t<revisionDesc>\n");
        }
        if (biblioItem.getA_Year() != null) {
            String a_Year = biblioItem.getA_Year();
            if (biblioItem.getA_Month() != null) {
                a_Year = a_Year + "-" + biblioItem.getA_Month();
                if (biblioItem.getA_Day() != null) {
                    a_Year = a_Year + "-" + biblioItem.getA_Day();
                }
            }
            sb.append("\t\t\t\t<date type=\"accepted\" when=\"");
            sb.append(a_Year).append("\" />\n");
        }
        if (biblioItem.getNormalizedSubmissionDate() != null) {
            Date normalizedSubmissionDate = biblioItem.getNormalizedSubmissionDate();
            int year3 = normalizedSubmissionDate.getYear();
            int month3 = normalizedSubmissionDate.getMonth();
            int day3 = normalizedSubmissionDate.getDay();
            String str10 = "" + year3;
            if (month3 != -1) {
                str10 = str10 + "-" + month3;
                if (day3 != -1) {
                    str10 = str10 + "-" + day3;
                }
            }
            sb.append("\t\t\t\t<date type=\"submission\" when=\"");
            sb.append(str10).append("\" />\n");
        } else if (biblioItem.getS_Year() != null) {
            String s_Year = biblioItem.getS_Year();
            if (biblioItem.getS_Month() != null) {
                s_Year = s_Year + "-" + biblioItem.getS_Month();
                if (biblioItem.getS_Day() != null) {
                    s_Year = s_Year + "-" + biblioItem.getS_Day();
                }
            }
            sb.append("\t\t\t\t<date type=\"submission\" when=\"");
            sb.append(s_Year).append("\" />\n");
        } else if (biblioItem.getSubmissionDate() != null) {
            sb.append("\t\t\t<date type=\"submission\">").append(TextUtilities.HTMLEncode(biblioItem.getSubmissionDate())).append("</date>\n");
        }
        if ((biblioItem.getA_Year() != null) | (biblioItem.getS_Year() != null) | (biblioItem.getSubmissionDate() != null)) {
            sb.append("\t\t</revisionDesc>\n");
        }
        sb.append("\t</teiHeader>\n");
        if (this.doc.getLanguage() != null) {
            sb.append("\t<text xml:lang=\"").append(this.doc.getLanguage()).append("\">\n");
        } else {
            sb.append("\t<text>\n");
        }
        return sb;
    }

    public StringBuilder toTEIBody(StringBuilder sb, String str, BiblioItem biblioItem, List<BibDataSet> list, LayoutTokenization layoutTokenization, List<Figure> list2, List<Table> list3, List<Equation> list4, Document document, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        if (str == null || layoutTokenization == null || layoutTokenization.getTokenization() == null) {
            sb.append("\t\t<body/>\n");
            return sb;
        }
        sb.append("\t\t<body>\n");
        StringBuilder tEINote = toTEINote(toTEITextPiece(sb, str, biblioItem, list, true, layoutTokenization, list2, list3, list4, document, grobidAnalysisConfig), document, grobidAnalysisConfig);
        tEINote.append("\t\t</body>\n");
        return tEINote;
    }

    private StringBuilder toTEINote(StringBuilder sb, Document document, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        SortedSet<DocumentPiece> documentPart = document.getDocumentPart(SegmentationLabels.FOOTNOTE);
        if (documentPart != null) {
            sb = toTEINote("foot", documentPart, sb, document, grobidAnalysisConfig);
        }
        SortedSet<DocumentPiece> documentPart2 = document.getDocumentPart(SegmentationLabels.MARGINNOTE);
        if (documentPart2 != null) {
            sb = toTEINote("margin", documentPart2, sb, document, grobidAnalysisConfig);
        }
        return sb;
    }

    private StringBuilder toTEINote(String str, SortedSet<DocumentPiece> sortedSet, StringBuilder sb, Document document, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        ArrayList arrayList = new ArrayList();
        for (DocumentPiece documentPiece : sortedSet) {
            List<LayoutToken> documentPieceTokenization = document.getDocumentPieceTokenization(documentPiece);
            if (documentPieceTokenization != null && documentPieceTokenization.size() != 0) {
                String trim = TextUtilities.dehyphenize(document.getDocumentPieceText(documentPiece)).replace("\n", " ").replace("  ", " ").trim();
                if (trim.length() >= 6 && !arrayList.contains(trim)) {
                    Matcher matcher = startNum.matcher(trim);
                    int i = -1;
                    if (matcher.find()) {
                        String group = matcher.group(1);
                        trim = matcher.group(2);
                        try {
                            i = Integer.parseInt(group);
                            if (i != -1) {
                                String str2 = group;
                                int i2 = 0;
                                for (LayoutToken layoutToken : documentPieceTokenization) {
                                    if (layoutToken.getText() != null && layoutToken.getText().length() != 0) {
                                        if (!str2.startsWith(layoutToken.getText())) {
                                            break;
                                        }
                                        i2++;
                                        str2 = str2.substring(layoutToken.getText().length());
                                        if (str2.length() == 0) {
                                            break;
                                        }
                                    }
                                }
                                if (i2 != 0) {
                                    documentPieceTokenization = documentPieceTokenization.subList(i2, documentPieceTokenization.size());
                                }
                            }
                        } catch (NumberFormatException e) {
                            i = -1;
                        }
                    }
                    arrayList.add(trim);
                    Element teiElement = XmlBuilderUtils.teiElement("note");
                    teiElement.addAttribute(new Attribute("place", str));
                    if (i != -1) {
                        teiElement.addAttribute(new Attribute(OperatorName.ENDPATH, "" + i));
                    }
                    if (grobidAnalysisConfig.isGenerateTeiIds()) {
                        XmlBuilderUtils.addXmlId(teiElement, "_" + KeyGen.getKey().substring(0, 7));
                    }
                    Pair<String, List<LayoutToken>> processShort = this.fullTextParser.processShort(documentPieceTokenization, document);
                    String left = processShort.getLeft();
                    List<LayoutToken> right = processShort.getRight();
                    if (left == null || left.length() <= 0) {
                        teiElement.appendChild(LayoutTokensUtil.normalizeText(trim.trim()));
                    } else {
                        for (TaggingTokenCluster taggingTokenCluster : new TaggingTokenClusteror(GrobidModels.FULLTEXT, left, right).cluster()) {
                            if (taggingTokenCluster != null) {
                                TaggingLabel taggingLabel = taggingTokenCluster.getTaggingLabel();
                                String normalizeDehyphenizeText = LayoutTokensUtil.normalizeDehyphenizeText(taggingTokenCluster.concatTokens());
                                if (taggingLabel.equals(TaggingLabels.CITATION_MARKER)) {
                                    try {
                                        List<Node> markReferencesTEILuceneBased = markReferencesTEILuceneBased(taggingTokenCluster.concatTokens(), document.getReferenceMarkerMatcher(), grobidAnalysisConfig.isGenerateTeiCoordinates("ref"), false);
                                        if (markReferencesTEILuceneBased != null) {
                                            Iterator<Node> it = markReferencesTEILuceneBased.iterator();
                                            while (it.hasNext()) {
                                                teiElement.appendChild(it.next());
                                            }
                                        }
                                    } catch (Exception e2) {
                                        LOGGER.warn("Problem when serializing TEI fragment for figure caption", (Throwable) e2);
                                    }
                                } else {
                                    teiElement.appendChild(XmlBuilderUtils.textNode(normalizeDehyphenizeText));
                                }
                            }
                        }
                    }
                    sb.append("\t\t\t");
                    sb.append(teiElement.toXML());
                    sb.append("\n");
                }
            }
        }
        return sb;
    }

    public StringBuilder toTEIAcknowledgement(StringBuilder sb, String str, List<LayoutToken> list, List<BibDataSet> list2, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        if (str == null || list == null) {
            return sb;
        }
        sb.append("\n\t\t\t<div type=\"acknowledgement\">\n");
        String[] split = toTEITextPiece(new StringBuilder(), str, null, list2, false, new LayoutTokenization(list), null, null, null, this.doc, grobidAnalysisConfig).toString().split("\n");
        if (split.length != 0) {
            for (int i = 0; i < split.length; i++) {
                if (split[i].trim().length() != 0) {
                    sb.append(TextUtilities.dehyphenize(split[i]) + "\n");
                }
            }
        }
        sb.append("\t\t\t</div>\n\n");
        return sb;
    }

    public StringBuilder toTEIAnnex(StringBuilder sb, String str, BiblioItem biblioItem, List<BibDataSet> list, List<LayoutToken> list2, Document document, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        if (str == null || list2 == null) {
            return sb;
        }
        sb.append("\t\t\t<div type=\"annex\">\n");
        StringBuilder tEITextPiece = toTEITextPiece(sb, str, biblioItem, list, true, new LayoutTokenization(list2), null, null, null, document, grobidAnalysisConfig);
        tEITextPiece.append("\t\t\t</div>\n");
        return tEITextPiece;
    }

    private StringBuilder toTEITextPiece(StringBuilder sb, String str, BiblioItem biblioItem, List<BibDataSet> list, boolean z, LayoutTokenization layoutTokenization, List<Figure> list2, List<Table> list3, List<Equation> list4, Document document, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        Element tEIElement;
        List<Node> markReferencesEquationTEI;
        TaggingLabel taggingLabel = null;
        int length = sb.length();
        List<TaggingTokenCluster> cluster = new TaggingTokenClusteror(GrobidModels.FULLTEXT, str, layoutTokenization.getTokenization()).cluster();
        ArrayList arrayList = new ArrayList();
        Element teiElement = XmlBuilderUtils.teiElement("div");
        if (grobidAnalysisConfig.isGenerateTeiIds()) {
            XmlBuilderUtils.addXmlId(teiElement, "_" + KeyGen.getKey().substring(0, 7));
        }
        arrayList.add(teiElement);
        Element element = null;
        Element element2 = null;
        int i = 0;
        for (TaggingTokenCluster taggingTokenCluster : cluster) {
            if (taggingTokenCluster != null) {
                TaggingLabel taggingLabel2 = taggingTokenCluster.getTaggingLabel();
                Engine.getCntManager().i(taggingLabel2);
                if (taggingLabel2.equals(TaggingLabels.SECTION)) {
                    String normalizeDehyphenizeText = LayoutTokensUtil.normalizeDehyphenizeText(taggingTokenCluster.concatTokens());
                    teiElement = XmlBuilderUtils.teiElement("div");
                    Element teiElement2 = XmlBuilderUtils.teiElement(HeaderTable.TAG);
                    org.grobid.core.utilities.Pair<String, String> sectionNumber = getSectionNumber(normalizeDehyphenizeText);
                    if (sectionNumber != null) {
                        teiElement2.addAttribute(new Attribute(OperatorName.ENDPATH, sectionNumber.b));
                        teiElement2.appendChild(sectionNumber.a);
                    } else {
                        teiElement2.appendChild(normalizeDehyphenizeText);
                    }
                    if (grobidAnalysisConfig.isGenerateTeiIds()) {
                        XmlBuilderUtils.addXmlId(teiElement2, "_" + KeyGen.getKey().substring(0, 7));
                    }
                    teiElement.appendChild(teiElement2);
                    arrayList.add(teiElement);
                } else if (taggingLabel2.equals(TaggingLabels.EQUATION) || taggingLabel2.equals(TaggingLabels.EQUATION_LABEL)) {
                    int i2 = -1;
                    if (taggingTokenCluster.concatTokens() != null && taggingTokenCluster.concatTokens().size() > 0) {
                        i2 = taggingTokenCluster.concatTokens().get(0).getOffset();
                    }
                    if (i2 != -1) {
                        Equation equation = null;
                        if (list4 != null) {
                            int i3 = 0;
                            while (true) {
                                if (i3 >= list4.size()) {
                                    break;
                                }
                                if (i3 >= i) {
                                    Equation equation2 = list4.get(i3);
                                    if (equation2.getStart() == i2) {
                                        equation = equation2;
                                        i = i3;
                                        break;
                                    }
                                }
                                i3++;
                            }
                            if (equation != null && (tEIElement = equation.toTEIElement(grobidAnalysisConfig)) != null) {
                                teiElement.appendChild(tEIElement);
                            }
                        }
                    }
                } else if (taggingLabel2.equals(TaggingLabels.ITEM)) {
                    Element teiElement3 = XmlBuilderUtils.teiElement("item", LayoutTokensUtil.normalizeText(taggingTokenCluster.concatTokens()));
                    if (!MARKER_LABELS.contains(taggingLabel) && taggingLabel != TaggingLabels.ITEM) {
                        element2 = XmlBuilderUtils.teiElement(SchemaSymbols.ATTVAL_LIST);
                        teiElement.appendChild(element2);
                    }
                    if (element2 != null) {
                        element2.appendChild(teiElement3);
                    }
                } else if (taggingLabel2.equals(TaggingLabels.OTHER)) {
                    Element teiElement4 = XmlBuilderUtils.teiElement("note", LayoutTokensUtil.normalizeDehyphenizeText(taggingTokenCluster.concatTokens()));
                    teiElement4.addAttribute(new Attribute("type", Constants.ATTRVAL_OTHER));
                    if (grobidAnalysisConfig.isGenerateTeiIds()) {
                        XmlBuilderUtils.addXmlId(teiElement4, "_" + KeyGen.getKey().substring(0, 7));
                    }
                    teiElement.appendChild(teiElement4);
                } else if (taggingLabel2.equals(TaggingLabels.PARAGRAPH)) {
                    String normalizeDehyphenizeText2 = LayoutTokensUtil.normalizeDehyphenizeText(taggingTokenCluster.concatTokens());
                    if (isNewParagraph(taggingLabel, element)) {
                        element = XmlBuilderUtils.teiElement("p");
                        if (grobidAnalysisConfig.isGenerateTeiIds()) {
                            XmlBuilderUtils.addXmlId(element, "_" + KeyGen.getKey().substring(0, 7));
                        }
                        teiElement.appendChild(element);
                    }
                    element.appendChild(normalizeDehyphenizeText2);
                } else if (MARKER_LABELS.contains(taggingLabel2)) {
                    List<LayoutToken> dehyphenize = LayoutTokensUtil.dehyphenize(taggingTokenCluster.concatTokens());
                    String text = LayoutTokensUtil.toText(dehyphenize);
                    Element element3 = element != null ? element : teiElement;
                    element3.appendChild(new Text(" "));
                    if (taggingLabel2.equals(TaggingLabels.CITATION_MARKER)) {
                        markReferencesEquationTEI = markReferencesTEILuceneBased(dehyphenize, document.getReferenceMarkerMatcher(), grobidAnalysisConfig.isGenerateTeiCoordinates("ref"), z);
                    } else if (taggingLabel2.equals(TaggingLabels.FIGURE_MARKER)) {
                        markReferencesEquationTEI = markReferencesFigureTEI(text, dehyphenize, list2, grobidAnalysisConfig.isGenerateTeiCoordinates("ref"));
                    } else if (taggingLabel2.equals(TaggingLabels.TABLE_MARKER)) {
                        markReferencesEquationTEI = markReferencesTableTEI(text, dehyphenize, list3, grobidAnalysisConfig.isGenerateTeiCoordinates("ref"));
                    } else {
                        if (!taggingLabel2.equals(TaggingLabels.EQUATION_MARKER)) {
                            throw new IllegalStateException("Unsupported marker type: " + taggingLabel2);
                        }
                        markReferencesEquationTEI = markReferencesEquationTEI(text, dehyphenize, list4, grobidAnalysisConfig.isGenerateTeiCoordinates("ref"));
                    }
                    if (markReferencesEquationTEI != null) {
                        Iterator<Node> it = markReferencesEquationTEI.iterator();
                        while (it.hasNext()) {
                            element3.appendChild(it.next());
                        }
                    }
                } else if ((taggingLabel2.equals(TaggingLabels.FIGURE) || taggingLabel2.equals(TaggingLabels.TABLE)) && element != null) {
                    element.appendChild(new Text(" "));
                }
                taggingLabel = taggingTokenCluster.getTaggingLabel();
            }
        }
        if (arrayList.size() != 0) {
            for (int size = arrayList.size() - 1; size >= 0; size--) {
                Element element4 = (Element) arrayList.get(size);
                if (element4.getChildElements() == null || element4.getChildElements().size() == 0) {
                    arrayList.remove(size);
                }
            }
        }
        if (arrayList.size() != 0) {
            sb.append(XmlBuilderUtils.toXml(arrayList));
        } else {
            sb.append(XmlBuilderUtils.toXml(teiElement));
        }
        StringBuilder replaceAll = TextUtilities.replaceAll(TextUtilities.replaceAll(sb, "</head><head", "</head>\n\t\t\t</div>\n\t\t\t<div>\n\t\t\t\t<head"), "</p>\t\t\t\t<p>", " ");
        int indexOf = replaceAll.indexOf("</p0>", length - 1);
        while (true) {
            int i4 = indexOf;
            if (i4 == -1) {
                break;
            }
            int indexOf2 = replaceAll.indexOf("<p>", i4 + 1);
            if (indexOf2 != 1 && replaceAll.length() > indexOf2 + 5 && Character.isUpperCase(replaceAll.charAt(indexOf2 + 4)) && Character.isLowerCase(replaceAll.charAt(indexOf2 + 5))) {
                replaceAll.setCharAt(indexOf2 + 1, 'q');
            }
            indexOf = replaceAll.indexOf("</p0>", i4 + 1);
        }
        StringBuilder replaceAll2 = TextUtilities.replaceAll(TextUtilities.replaceAll(TextUtilities.replaceAll(replaceAll, "</p0>(\\n\\t)*<q>", " "), "</p0>", "</p>"), "<q>", "<p>");
        if (list2 != null) {
            Iterator<Figure> it2 = list2.iterator();
            while (it2.hasNext()) {
                String tei = it2.next().toTEI(grobidAnalysisConfig, document, this);
                if (tei != null) {
                    replaceAll2.append(tei).append("\n");
                }
            }
        }
        if (list3 != null) {
            Iterator<Table> it3 = list3.iterator();
            while (it3.hasNext()) {
                String tei2 = it3.next().toTEI(grobidAnalysisConfig, document, this);
                if (tei2 != null) {
                    replaceAll2.append(tei2).append("\n");
                }
            }
        }
        return replaceAll2;
    }

    private boolean isNewParagraph(TaggingLabel taggingLabel, Element element) {
        return !(MARKER_LABELS.contains(taggingLabel) || taggingLabel == TaggingLabels.FIGURE || taggingLabel == TaggingLabels.TABLE) || element == null;
    }

    private List<GraphicObject> getGraphicObject(List<GraphicObject> list, int i, int i2) {
        ArrayList arrayList = new ArrayList();
        for (GraphicObject graphicObject : list) {
            if (graphicObject.getStartPosition() >= i && graphicObject.getStartPosition() <= i2) {
                arrayList.add(graphicObject);
            }
            if (graphicObject.getStartPosition() > i2) {
                break;
            }
        }
        return arrayList;
    }

    private org.grobid.core.utilities.Pair<String, String> getSectionNumber(String str) {
        Matcher matcher = BasicStructureBuilder.headerNumbering1.matcher(str);
        Matcher matcher2 = BasicStructureBuilder.headerNumbering2.matcher(str);
        Matcher matcher3 = BasicStructureBuilder.headerNumbering3.matcher(str);
        String str2 = null;
        if (matcher.find()) {
            str2 = matcher.group(0);
        } else if (matcher2.find()) {
            str2 = matcher2.group(0);
        } else if (matcher3.find()) {
            str2 = matcher3.group(0);
        }
        if (str2 != null) {
            return new org.grobid.core.utilities.Pair<>(str.replace(str2, "").trim(), str2.replace(" ", ""));
        }
        return null;
    }

    public StringBuilder toTEIReferences(StringBuilder sb, List<BibDataSet> list, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        sb.append("\t\t\t<div type=\"references\">\n\n");
        if (list == null || list.size() == 0) {
            sb.append("\t\t\t\t<listBibl/>\n");
        } else {
            sb.append("\t\t\t\t<listBibl>\n");
            int i = 0;
            if (list.size() > 0) {
                for (BibDataSet bibDataSet : list) {
                    BiblioItem resBib = bibDataSet.getResBib();
                    resBib.setReference(bibDataSet.getRawBib());
                    if (resBib != null) {
                        sb.append("\n" + resBib.toTEI(i, 0, grobidAnalysisConfig));
                    } else {
                        sb.append("\n");
                    }
                    i++;
                }
            }
            sb.append("\n\t\t\t\t</listBibl>\n");
        }
        sb.append("\t\t\t</div>\n");
        return sb;
    }

    public static String getCoordsAttribute(List<BoundingBox> list, boolean z) {
        if (!z || list == null || list.isEmpty()) {
            return "";
        }
        return "coords=\"" + Joiner.on(";").join(list) + "\"";
    }

    public List<Node> markReferencesTEILuceneBased(List<LayoutToken> list, ReferenceMarkerMatcher referenceMarkerMatcher, boolean z, boolean z2) throws EntityMatcherException {
        if (list == null || list.size() == 0) {
            return null;
        }
        String text = LayoutTokensUtil.toText(list);
        if (text == null || text.trim().length() == 0 || text.endsWith("</ref>") || text.startsWith("<ref") || referenceMarkerMatcher == null) {
            return Collections.singletonList(new Text(text));
        }
        boolean z3 = text.replace("\n", " ").endsWith(" ");
        ArrayList arrayList = new ArrayList();
        List<ReferenceMarkerMatcher.MatchResult> match = referenceMarkerMatcher.match(list);
        if (match != null) {
            for (ReferenceMarkerMatcher.MatchResult matchResult : match) {
                String normalizeText = LayoutTokensUtil.normalizeText(matchResult.getText());
                String str = null;
                if (z && matchResult.getTokens() != null) {
                    str = LayoutTokensUtil.getCoordsString(matchResult.getTokens());
                }
                Element teiElement = XmlBuilderUtils.teiElement("ref");
                teiElement.addAttribute(new Attribute("type", "bibr"));
                if (str != null) {
                    teiElement.addAttribute(new Attribute("coords", str));
                }
                teiElement.appendChild(normalizeText);
                boolean z4 = false;
                if (matchResult.getBibDataSet() != null) {
                    teiElement.addAttribute(new Attribute("target", "#b" + matchResult.getBibDataSet().getResBib().getOrdinal()));
                    z4 = true;
                }
                if (z4 || (!z4 && z2)) {
                    arrayList.add(teiElement);
                } else {
                    arrayList.add(XmlBuilderUtils.textNode(matchResult.getText()));
                }
            }
        }
        if (z3) {
            arrayList.add(new Text(" "));
        }
        return arrayList;
    }

    public List<Node> markReferencesFigureTEI(String str, List<LayoutToken> list, List<Figure> list2, boolean z) {
        if (str == null || str.trim().isEmpty()) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        String lowerCase = str.toLowerCase();
        String str2 = null;
        if (list2 != null) {
            Iterator<Figure> it = list2.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                Figure next = it.next();
                if (next.getLabel() != null && next.getLabel().length() > 0) {
                    String cleanField = TextUtilities.cleanField(next.getLabel(), false);
                    if (cleanField.length() > 0 && lowerCase.contains(cleanField.toLowerCase())) {
                        str2 = next.getId();
                        break;
                    }
                }
            }
        }
        boolean z2 = false;
        String replace = str.replace("\n", " ");
        if (replace.endsWith(" ")) {
            z2 = true;
        }
        String trim = replace.trim();
        String str3 = null;
        if (z && list != null) {
            str3 = LayoutTokensUtil.getCoordsString(list);
        }
        Element teiElement = XmlBuilderUtils.teiElement("ref");
        teiElement.addAttribute(new Attribute("type", "figure"));
        if (str3 != null) {
            teiElement.addAttribute(new Attribute("coords", str3));
        }
        teiElement.appendChild(trim);
        if (str2 != null) {
            teiElement.addAttribute(new Attribute("target", "#fig_" + str2));
        }
        arrayList.add(teiElement);
        if (z2) {
            arrayList.add(new Text(" "));
        }
        return arrayList;
    }

    public List<Node> markReferencesTableTEI(String str, List<LayoutToken> list, List<Table> list2, boolean z) {
        if (str == null || str.trim().isEmpty()) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        String lowerCase = str.toLowerCase();
        String str2 = null;
        if (list2 != null) {
            Iterator<Table> it = list2.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                Table next = it.next();
                if (next.getLabel() != null && next.getLabel().length() > 0) {
                    String cleanField = TextUtilities.cleanField(next.getLabel(), false);
                    if (cleanField.length() > 0 && lowerCase.contains(cleanField.toLowerCase())) {
                        str2 = next.getId();
                        break;
                    }
                }
            }
        }
        boolean z2 = false;
        String replace = str.replace("\n", " ");
        if (replace.endsWith(" ")) {
            z2 = true;
        }
        String trim = replace.trim();
        String str3 = null;
        if (z && list != null) {
            str3 = LayoutTokensUtil.getCoordsString(list);
        }
        Element teiElement = XmlBuilderUtils.teiElement("ref");
        teiElement.addAttribute(new Attribute("type", "table"));
        if (str3 != null) {
            teiElement.addAttribute(new Attribute("coords", str3));
        }
        teiElement.appendChild(trim);
        if (str2 != null) {
            teiElement.addAttribute(new Attribute("target", "#tab_" + str2));
        }
        arrayList.add(teiElement);
        if (z2) {
            arrayList.add(new Text(" "));
        }
        return arrayList;
    }

    public List<Node> markReferencesEquationTEI(String str, List<LayoutToken> list, List<Equation> list2, boolean z) {
        if (str == null || str.trim().isEmpty()) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        String lowerCase = str.toLowerCase();
        String str2 = null;
        if (list2 != null) {
            Iterator<Equation> it = list2.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                Equation next = it.next();
                if (next.getLabel() != null && next.getLabel().length() > 0) {
                    String cleanField = TextUtilities.cleanField(next.getLabel(), false);
                    if (cleanField.length() > 0 && lowerCase.contains(cleanField.toLowerCase())) {
                        str2 = next.getId();
                        break;
                    }
                }
            }
        }
        boolean z2 = false;
        String replace = str.replace("\n", " ");
        if (replace.endsWith(" ")) {
            z2 = true;
        }
        String trim = replace.trim();
        String str3 = null;
        if (z && list != null) {
            str3 = LayoutTokensUtil.getCoordsString(list);
        }
        Element teiElement = XmlBuilderUtils.teiElement("ref");
        teiElement.addAttribute(new Attribute("type", "formula"));
        if (str3 != null) {
            teiElement.addAttribute(new Attribute("coords", str3));
        }
        teiElement.appendChild(trim);
        if (str2 != null) {
            teiElement.addAttribute(new Attribute("target", "#formula_" + str2));
        }
        arrayList.add(teiElement);
        if (z2) {
            arrayList.add(new Text(" "));
        }
        return arrayList;
    }

    private String normalizeText(String str) {
        return TextUtilities.dehyphenize(str.trim()).replace("\n", " ").replace("  ", " ").trim();
    }
}
