package org.grobid.core.engines;

import com.google.common.collect.Iterables;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.TreeSet;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.xalan.templates.Constants;
import org.apache.xpath.XPath;
import org.grobid.core.GrobidModels;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.Equation;
import org.grobid.core.data.Figure;
import org.grobid.core.data.Table;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentPiece;
import org.grobid.core.document.DocumentPointer;
import org.grobid.core.document.DocumentSource;
import org.grobid.core.document.TEIFormatter;
import org.grobid.core.engines.citations.LabeledReferenceResult;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.counters.CitationParserCounters;
import org.grobid.core.engines.label.SegmentationLabels;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.engines.label.TaggingLabels;
import org.grobid.core.engines.tagging.GenericTaggerUtils;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidResourceException;
import org.grobid.core.features.FeatureFactory;
import org.grobid.core.features.FeaturesVectorFulltext;
import org.grobid.core.lang.Language;
import org.grobid.core.layout.Block;
import org.grobid.core.layout.GraphicObject;
import org.grobid.core.layout.GraphicObjectType;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.layout.LayoutTokenization;
import org.grobid.core.tokenization.TaggingTokenCluster;
import org.grobid.core.tokenization.TaggingTokenClusteror;
import org.grobid.core.utilities.Consolidation;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.KeyGen;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.matching.EntityMatcherException;
import org.grobid.core.utilities.matching.ReferenceMarkerMatcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/grobid/core/engines/FullTextParser.class */
public class FullTextParser extends AbstractParser {
    private File tmpPath;
    private static final int NBBINS_POSITION = 12;
    private static final int NBBINS_SPACE = 5;
    private static final int NBBINS_DENSITY = 5;
    private static final int LINESCALE = 10;
    private EngineParsers parsers;
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) FullTextParser.class);
    private static List<TaggingLabel> inlineFullTextLabels = Arrays.asList(TaggingLabels.CITATION_MARKER, TaggingLabels.TABLE_MARKER, TaggingLabels.FIGURE_MARKER, TaggingLabels.EQUATION_LABEL);

    public FullTextParser(EngineParsers engineParsers) {
        super(GrobidModels.FULLTEXT);
        this.tmpPath = null;
        this.parsers = engineParsers;
        this.tmpPath = GrobidProperties.getTempPath();
    }

    public Document processing(File file, GrobidAnalysisConfig grobidAnalysisConfig) throws Exception {
        return processing(DocumentSource.fromPdf(file, grobidAnalysisConfig.getStartPage(), grobidAnalysisConfig.getEndPage(), grobidAnalysisConfig.getPdfAssetPath() != null, true, false), grobidAnalysisConfig);
    }

    public Document processing(DocumentSource documentSource, GrobidAnalysisConfig grobidAnalysisConfig) {
        Pair<String, List<LayoutToken>> processShort;
        if (this.tmpPath == null) {
            throw new GrobidResourceException("Cannot process pdf file, because temp path is null.");
        }
        if (!this.tmpPath.exists()) {
            throw new GrobidResourceException("Cannot process pdf file, because temp path '" + this.tmpPath.getAbsolutePath() + "' does not exists.");
        }
        try {
            Document processing = this.parsers.getSegmentationParser().processing(documentSource, grobidAnalysisConfig);
            SortedSet<DocumentPiece> documentPart = processing.getDocumentPart(SegmentationLabels.BODY);
            BiblioItem biblioItem = new BiblioItem();
            if (GrobidProperties.isHeaderUseHeuristics()) {
                this.parsers.getHeaderParser().processingHeaderBlock(grobidAnalysisConfig, processing, biblioItem);
            }
            if (StringUtils.isBlank(biblioItem.getTitle()) || StringUtils.isBlank(biblioItem.getAuthors()) || CollectionUtils.isEmpty(biblioItem.getFullAuthors())) {
                biblioItem = new BiblioItem();
                this.parsers.getHeaderParser().processingHeaderSection(grobidAnalysisConfig, processing, biblioItem);
            } else {
                BiblioItem biblioItem2 = new BiblioItem();
                this.parsers.getHeaderParser().processingHeaderSection(GrobidAnalysisConfig.builder(grobidAnalysisConfig).consolidateHeader(0).build(), processing, biblioItem2);
                if (StringUtils.isNotBlank(biblioItem2.getAbstract())) {
                    biblioItem.setAbstract(biblioItem2.getAbstract());
                    biblioItem.setLayoutTokensForLabel(biblioItem2.getLayoutTokens(TaggingLabels.HEADER_ABSTRACT), TaggingLabels.HEADER_ABSTRACT);
                }
            }
            if (StringUtils.isNotBlank(biblioItem.getAbstract())) {
                List<LayoutToken> layoutTokens = biblioItem.getLayoutTokens(TaggingLabels.HEADER_ABSTRACT);
                if (CollectionUtils.isNotEmpty(layoutTokens) && (processShort = processShort(BiblioItem.cleanAbstractLayoutTokens(layoutTokens), processing)) != null) {
                    biblioItem.setLabeledAbstract(postProcessLabeledAbstract(processShort.getLeft()));
                    biblioItem.setLayoutTokensForLabel(processShort.getRight(), TaggingLabels.HEADER_ABSTRACT);
                }
            }
            List<BibDataSet> processingReferenceSection = this.parsers.getCitationParser().processingReferenceSection(processing, this.parsers.getReferenceSegmenterParser(), 0);
            if (grobidAnalysisConfig.getConsolidateCitations() != 0) {
                Consolidation consolidation = Consolidation.getInstance();
                if (consolidation.getCntManager() == null) {
                    consolidation.setCntManager(Engine.getCntManager());
                }
                try {
                    Map<Integer, BiblioItem> consolidate = consolidation.consolidate(processingReferenceSection);
                    for (int i = 0; i < processingReferenceSection.size(); i++) {
                        BiblioItem resBib = processingReferenceSection.get(i).getResBib();
                        BiblioItem biblioItem3 = consolidate.get(Integer.valueOf(i));
                        if (biblioItem3 != null) {
                            if (grobidAnalysisConfig.getConsolidateCitations() == 1) {
                                BiblioItem.correct(resBib, biblioItem3);
                            } else if (grobidAnalysisConfig.getConsolidateCitations() == 2) {
                                BiblioItem.injectDOI(resBib, biblioItem3);
                            }
                        }
                    }
                } catch (Exception e) {
                    throw new GrobidException("An exception occured while running consolidation on bibliographical references.", e);
                }
            }
            processing.setBibDataSets(processingReferenceSection);
            Pair<String, LayoutTokenization> bodyTextFeatured = getBodyTextFeatured(processing, documentPart);
            String str = null;
            LayoutTokenization layoutTokenization = null;
            List<Figure> list = null;
            List<Table> list2 = null;
            List<Equation> list3 = null;
            if (bodyTextFeatured != null) {
                String left = bodyTextFeatured.getLeft();
                layoutTokenization = bodyTextFeatured.getRight();
                if (left == null || left.trim().length() <= 0) {
                    LOGGER.debug("Fulltext model: The input to the CRF processing is empty");
                } else {
                    str = label(left);
                }
                list = processFigures(str, layoutTokenization.getTokenization(), processing);
                for (Figure figure : list) {
                    if (figure.getCaptionLayoutTokens() != null && figure.getCaptionLayoutTokens().size() > 0) {
                        Pair<String, List<LayoutToken>> processShort2 = processShort(figure.getCaptionLayoutTokens(), processing);
                        figure.setLabeledCaption(processShort2.getLeft());
                        figure.setCaptionLayoutTokens(processShort2.getRight());
                    }
                }
                list2 = processTables(str, layoutTokenization.getTokenization(), processing);
                for (Table table : list2) {
                    if (table.getCaptionLayoutTokens() != null && table.getCaptionLayoutTokens().size() > 0) {
                        Pair<String, List<LayoutToken>> processShort3 = processShort(table.getCaptionLayoutTokens(), processing);
                        table.setLabeledCaption(processShort3.getLeft());
                        table.setCaptionLayoutTokens(processShort3.getRight());
                    }
                }
                list3 = processEquations(str, layoutTokenization.getTokenization(), processing);
            } else {
                LOGGER.debug("Fulltext model: The featured body is empty");
            }
            Pair<String, LayoutTokenization> bodyTextFeatured2 = getBodyTextFeatured(processing, processing.getDocumentPart(SegmentationLabels.ANNEX));
            String str2 = null;
            List<LayoutToken> list4 = null;
            if (bodyTextFeatured2 != null) {
                String left2 = bodyTextFeatured2.getLeft();
                list4 = bodyTextFeatured2.getRight().getTokenization();
                if (StringUtils.isNotEmpty(StringUtils.trim(left2))) {
                    str2 = label(left2);
                }
            }
            toTEI(processing, str, str2, layoutTokenization, list4, biblioItem, list, list2, list3, grobidAnalysisConfig);
            return processing;
        } catch (GrobidException e2) {
            throw e2;
        } catch (Exception e3) {
            throw new GrobidException("An exception occurred while running Grobid.", e3);
        }
    }

    public Pair<String, List<LayoutToken>> processShortNew(List<LayoutToken> list, Document document) {
        if (CollectionUtils.isEmpty(list)) {
            return null;
        }
        TreeSet treeSet = new TreeSet();
        int i = -1;
        int i2 = -1;
        int i3 = -1;
        LayoutToken layoutToken = null;
        for (LayoutToken layoutToken2 : list) {
            if (i2 == -1) {
                i = getDocIndexToken(document, layoutToken2);
                i3 = layoutToken2.getBlockPtr();
            } else if (layoutToken2.getOffset() != i2 + layoutToken.getText().length()) {
                treeSet.add(new DocumentPiece(new DocumentPointer(document, i3, i), new DocumentPointer(document, layoutToken.getBlockPtr(), getDocIndexToken(document, layoutToken))));
                i = getDocIndexToken(document, layoutToken2);
                i3 = layoutToken2.getBlockPtr();
            }
            i2 = layoutToken2.getOffset();
            layoutToken = layoutToken2;
        }
        if (i != -1) {
            treeSet.add(new DocumentPiece(new DocumentPointer(document, i3, i), new DocumentPointer(document, layoutToken.getBlockPtr(), getDocIndexToken(document, layoutToken))));
        }
        Pair<String, LayoutTokenization> bodyTextFeatured = getBodyTextFeatured(document, treeSet);
        Collection arrayList = new ArrayList();
        if (bodyTextFeatured == null) {
            return null;
        }
        String left = bodyTextFeatured.getLeft();
        LayoutTokenization right = bodyTextFeatured.getRight();
        if (right != null) {
            arrayList = right.getTokenization();
        }
        return Pair.of(StringUtils.isNotBlank(left) ? label(left) : "", arrayList);
    }

    public Pair<String, List<LayoutToken>> processShort(List<LayoutToken> list, Document document) {
        if (CollectionUtils.isEmpty(list)) {
            return null;
        }
        TreeSet treeSet = new TreeSet();
        ArrayList<List> arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        for (LayoutToken layoutToken : list) {
            if (arrayList2.size() != 0) {
                if (i != layoutToken.getOffset()) {
                    arrayList.add(arrayList2);
                    arrayList2 = new ArrayList();
                }
            }
            arrayList2.add(layoutToken);
            i = layoutToken.getOffset() + layoutToken.getText().length();
        }
        arrayList.add(arrayList2);
        for (List list2 : arrayList) {
            int size = list2.size() - 1;
            treeSet.add(new DocumentPiece(new DocumentPointer(document, ((LayoutToken) list2.get(0)).getBlockPtr(), getDocIndexToken(document, (LayoutToken) list2.get(0))), new DocumentPointer(document, ((LayoutToken) list2.get(size)).getBlockPtr(), getDocIndexToken(document, (LayoutToken) list2.get(size)))));
        }
        Pair<String, LayoutTokenization> bodyTextFeatured = getBodyTextFeatured(document, treeSet);
        String str = null;
        if (bodyTextFeatured != null) {
            String left = bodyTextFeatured.getLeft();
            LayoutTokenization right = bodyTextFeatured.getRight();
            r15 = right != null ? right.getTokenization() : null;
            if (left != null && left.trim().length() > 0) {
                str = label(left);
            }
        }
        return Pair.of(str, r15);
    }

    protected static String postProcessLabeledAbstract(String str) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        String str2 = null;
        for (String str3 : str.split("\n")) {
            if (str3 != null && str3.trim().length() != 0) {
                String[] split = str3.split("\t");
                String str4 = split[split.length - 1];
                if (str4.equals("I-" + TaggingLabels.FIGURE.getLabel()) || str4.equals("I-" + TaggingLabels.TABLE.getLabel())) {
                    if (str2 == null || !str2.endsWith(TaggingLabels.PARAGRAPH.getLabel())) {
                        split[split.length - 1] = "I-" + TaggingLabels.PARAGRAPH.getLabel();
                    } else {
                        split[split.length - 1] = TaggingLabels.PARAGRAPH.getLabel();
                    }
                } else if (str4.equals(TaggingLabels.FIGURE.getLabel()) || str4.equals(TaggingLabels.TABLE.getLabel())) {
                    split[split.length - 1] = TaggingLabels.PARAGRAPH.getLabel();
                }
                for (int i = 0; i < split.length; i++) {
                    if (i != 0) {
                        sb.append("\t");
                    }
                    sb.append(split[i]);
                }
                str2 = str4;
                sb.append("\n");
            }
        }
        return sb.toString();
    }

    public static Pair<String, LayoutTokenization> getBodyTextFeatured(Document document, SortedSet<DocumentPiece> sortedSet) {
        String text;
        if (sortedSet == null || sortedSet.size() == 0) {
            return null;
        }
        FeatureFactory featureFactory = FeatureFactory.getInstance();
        StringBuilder sb = new StringBuilder();
        String str = null;
        int i = -1;
        List<Block> blocks = document.getBlocks();
        if (blocks == null || blocks.size() == 0) {
            return null;
        }
        FeaturesVectorFulltext featuresVectorFulltext = null;
        ReferenceMarkerMatcher referenceMarkerMatcher = null;
        String str2 = "UNKNOWN";
        List<BibDataSet> bibDataSets = document.getBibDataSets();
        if (bibDataSets != null) {
            try {
                referenceMarkerMatcher = document.getReferenceMarkerMatcher();
                int i2 = 0;
                int i3 = 0;
                for (BibDataSet bibDataSet : bibDataSets) {
                    if (bibDataSet != null && bibDataSet.getRefSymbol() != null) {
                        if (ReferenceMarkerMatcher.isNumberedCitationReference(bibDataSet.getRefSymbol())) {
                            i2++;
                        } else if (referenceMarkerMatcher.isAuthorCitationStyle(bibDataSet.getRefSymbol())) {
                            i3++;
                        }
                    }
                }
                if (i2 > bibDataSets.size() / 2) {
                    str2 = "NUMBER";
                } else if (i2 > bibDataSets.size() / 2) {
                    str2 = "AUTHOR";
                }
            } catch (EntityMatcherException e) {
                e.printStackTrace();
                LOGGER.info("Could not build the bibliographical matcher", (Throwable) e);
            }
        }
        int i4 = 0;
        int i5 = 0;
        double d = Double.NaN;
        boolean z = false;
        double d2 = 0.0d;
        int i6 = 0;
        ArrayList arrayList = new ArrayList();
        int fulltextLength = getFulltextLength(document, sortedSet, 0);
        for (DocumentPiece documentPiece : sortedSet) {
            DocumentPointer left = documentPiece.getLeft();
            DocumentPointer right = documentPiece.getRight();
            for (int blockPtr = left.getBlockPtr(); blockPtr <= right.getBlockPtr(); blockPtr++) {
                boolean z2 = false;
                boolean z3 = false;
                Block block = blocks.get(blockPtr);
                double height = block.getPage().getHeight();
                int number = block.getPage().getNumber();
                if (number != i6) {
                    i6 = number;
                    i4 = 0;
                    d2 = 0.0d;
                }
                boolean z4 = false;
                boolean z5 = false;
                double maxBlockSpacing = d2 > block.getY() ? document.getMaxBlockSpacing() / 5.0d : block.getY() - d2;
                String text2 = block.getText();
                if (!TextUtilities.filterLine(text2)) {
                    double d3 = 0.0d;
                    if (block.getHeight() != XPath.MATCH_SCORE_QNAME && block.getWidth() != XPath.MATCH_SCORE_QNAME && text2 != null && !text2.contains("@PAGE") && !text2.contains("@IMAGE")) {
                        d3 = text2.length() / (block.getHeight() * block.getWidth());
                    }
                    List<GraphicObject> connectedGraphics = Document.getConnectedGraphics(block, document);
                    if (connectedGraphics != null) {
                        for (GraphicObject graphicObject : connectedGraphics) {
                            if (graphicObject.getType() == GraphicObjectType.BITMAP) {
                                z2 = true;
                            }
                            if (graphicObject.getType() == GraphicObjectType.VECTOR) {
                                z3 = true;
                            }
                        }
                    }
                    List<LayoutToken> tokens = block.getTokens();
                    if (tokens != null) {
                        int tokenBlockPos = blockPtr == left.getBlockPtr() ? left.getTokenBlockPos() : 0;
                        int size = tokens.size();
                        if (blockPtr == right.getBlockPtr()) {
                            size = right.getTokenBlockPos() + 1;
                            if (size > tokens.size()) {
                                LOGGER.error("DocumentPointer for block " + blockPtr + " points to " + right.getTokenBlockPos() + " token, but block token size is " + tokens.size());
                                size = tokens.size();
                            }
                        }
                        while (tokenBlockPos < size && (blockPtr != right.getBlockPtr() || tokenBlockPos <= right.getTokenDocPos() - block.getStartToken())) {
                            LayoutToken layoutToken = tokens.get(tokenBlockPos);
                            arrayList.add(layoutToken);
                            FeaturesVectorFulltext featuresVectorFulltext2 = new FeaturesVectorFulltext();
                            featuresVectorFulltext2.token = layoutToken;
                            double y = layoutToken.getY();
                            String text3 = layoutToken.getText();
                            if (text3 == null || text3.length() == 0) {
                                tokenBlockPos++;
                            } else {
                                String replace = text3.replace(" ", "");
                                if (replace.length() == 0) {
                                    tokenBlockPos++;
                                    i4++;
                                    i5++;
                                } else if (replace.equals("\n")) {
                                    z4 = true;
                                    tokenBlockPos++;
                                    i4++;
                                    i5++;
                                } else {
                                    boolean z6 = false;
                                    String replaceAll = replace.replaceAll("[ \n]", "");
                                    if (TextUtilities.filterLine(replaceAll)) {
                                        tokenBlockPos++;
                                    } else {
                                        if (z4) {
                                            z6 = true;
                                            z4 = false;
                                            if (layoutToken != null && featuresVectorFulltext != null) {
                                                double d4 = d;
                                                d = layoutToken.getX();
                                                double length = layoutToken.width / replaceAll.length();
                                                if (!Double.isNaN(d4)) {
                                                    if (d4 - d > length) {
                                                        z = false;
                                                    } else if (d - d4 > length) {
                                                        z = true;
                                                    }
                                                }
                                            }
                                        }
                                        featuresVectorFulltext2.string = replaceAll;
                                        if (z3) {
                                            featuresVectorFulltext2.bitmapAround = true;
                                        }
                                        if (z2) {
                                            featuresVectorFulltext2.vectorAround = true;
                                        }
                                        if (z6) {
                                            featuresVectorFulltext2.lineStatus = "LINESTART";
                                            if (layoutToken != null) {
                                                d = layoutToken.getX();
                                            }
                                            if (featuresVectorFulltext != null && !featuresVectorFulltext.lineStatus.equals("LINESTART")) {
                                                featuresVectorFulltext.lineStatus = "LINEEND";
                                            }
                                        }
                                        if (featureFactory.isPunct.matcher(replaceAll).find()) {
                                            featuresVectorFulltext2.punctType = "PUNCT";
                                        }
                                        if (replaceAll.equals(TextUtilities.START_BRACKET) || replaceAll.equals("[")) {
                                            featuresVectorFulltext2.punctType = "OPENBRACKET";
                                        } else if (replaceAll.equals(TextUtilities.END_BRACKET) || replaceAll.equals("]")) {
                                            featuresVectorFulltext2.punctType = "ENDBRACKET";
                                        } else if (replaceAll.equals(Constants.ATTRVAL_THIS)) {
                                            featuresVectorFulltext2.punctType = "DOT";
                                        } else if (replaceAll.equals(TextUtilities.COMMA)) {
                                            featuresVectorFulltext2.punctType = "COMMA";
                                        } else if (replaceAll.equals("-")) {
                                            featuresVectorFulltext2.punctType = "HYPHEN";
                                        } else if (replaceAll.equals("\"") || replaceAll.equals("'") || replaceAll.equals("`")) {
                                            featuresVectorFulltext2.punctType = "QUOTE";
                                        }
                                        if (z) {
                                            featuresVectorFulltext2.alignmentStatus = "LINEINDENT";
                                        } else {
                                            featuresVectorFulltext2.alignmentStatus = "ALIGNEDLEFT";
                                        }
                                        if (tokenBlockPos == 0) {
                                            featuresVectorFulltext2.lineStatus = "LINESTART";
                                            if (featuresVectorFulltext != null && !featuresVectorFulltext.lineStatus.equals("LINESTART")) {
                                                featuresVectorFulltext.lineStatus = "LINEEND";
                                            }
                                            if (layoutToken != null) {
                                                d = layoutToken.getX();
                                            }
                                            featuresVectorFulltext2.blockStatus = "BLOCKSTART";
                                        } else if (tokenBlockPos == tokens.size() - 1) {
                                            featuresVectorFulltext2.lineStatus = "LINEEND";
                                            z4 = true;
                                            featuresVectorFulltext2.blockStatus = "BLOCKEND";
                                            z5 = true;
                                        } else {
                                            boolean z7 = false;
                                            boolean z8 = false;
                                            for (int i7 = 1; tokenBlockPos + i7 < tokens.size() && !z8; i7++) {
                                                LayoutToken layoutToken2 = tokens.get(tokenBlockPos + i7);
                                                if (layoutToken2 != null && (text = layoutToken2.getText()) != null) {
                                                    if (text.equals("\n")) {
                                                        z7 = true;
                                                        z8 = true;
                                                    } else if (text.length() != 0 && !text.startsWith("@IMAGE") && !text.startsWith("@PAGE") && !replaceAll.contains(".pbm") && !replaceAll.contains(".svg") && !replaceAll.contains(".png") && !replaceAll.contains(".jpg")) {
                                                        z8 = true;
                                                    }
                                                }
                                                if (tokenBlockPos + i7 == tokens.size() - 1) {
                                                    z5 = true;
                                                    z7 = true;
                                                }
                                            }
                                            if (!z7 && !z6) {
                                                featuresVectorFulltext2.lineStatus = "LINEIN";
                                            } else if (!z6) {
                                                featuresVectorFulltext2.lineStatus = "LINEEND";
                                                z4 = true;
                                            }
                                            if (!z5 && featuresVectorFulltext2.blockStatus == null) {
                                                featuresVectorFulltext2.blockStatus = "BLOCKIN";
                                            } else if (featuresVectorFulltext2.blockStatus == null) {
                                                featuresVectorFulltext2.blockStatus = "BLOCKEND";
                                            }
                                        }
                                        if (replaceAll.length() == 1) {
                                            featuresVectorFulltext2.singleChar = true;
                                        }
                                        if (Character.isUpperCase(replaceAll.charAt(0))) {
                                            featuresVectorFulltext2.capitalisation = "INITCAP";
                                        }
                                        if (featureFactory.test_all_capital(replaceAll)) {
                                            featuresVectorFulltext2.capitalisation = "ALLCAP";
                                        }
                                        if (FeatureFactory.test_digit(replaceAll)) {
                                            featuresVectorFulltext2.digit = "CONTAINSDIGITS";
                                        }
                                        if (featureFactory.isDigit.matcher(replaceAll).find()) {
                                            featuresVectorFulltext2.digit = "ALLDIGIT";
                                        }
                                        if (str == null) {
                                            str = layoutToken.getFont();
                                            featuresVectorFulltext2.fontStatus = "NEWFONT";
                                        } else if (str.equals(layoutToken.getFont())) {
                                            featuresVectorFulltext2.fontStatus = "SAMEFONT";
                                        } else {
                                            str = layoutToken.getFont();
                                            featuresVectorFulltext2.fontStatus = "NEWFONT";
                                        }
                                        int fontSize = (int) layoutToken.getFontSize();
                                        if (i == -1) {
                                            i = fontSize;
                                            featuresVectorFulltext2.fontSize = "HIGHERFONT";
                                        } else if (i == fontSize) {
                                            featuresVectorFulltext2.fontSize = "SAMEFONTSIZE";
                                        } else if (i < fontSize) {
                                            featuresVectorFulltext2.fontSize = "HIGHERFONT";
                                            i = fontSize;
                                        } else if (i > fontSize) {
                                            featuresVectorFulltext2.fontSize = "LOWERFONT";
                                            i = fontSize;
                                        }
                                        if (layoutToken.getBold()) {
                                            featuresVectorFulltext2.bold = true;
                                        }
                                        if (layoutToken.getItalic()) {
                                            featuresVectorFulltext2.italic = true;
                                        }
                                        if (featuresVectorFulltext2.capitalisation == null) {
                                            featuresVectorFulltext2.capitalisation = "NOCAPS";
                                        }
                                        if (featuresVectorFulltext2.digit == null) {
                                            featuresVectorFulltext2.digit = "NODIGIT";
                                        }
                                        if (featuresVectorFulltext2.punctType == null) {
                                            featuresVectorFulltext2.punctType = "NOPUNCT";
                                        }
                                        featuresVectorFulltext2.relativeDocumentPosition = featureFactory.linearScaling(i5, fulltextLength, 12);
                                        featuresVectorFulltext2.relativePagePositionChar = featureFactory.linearScaling(i4, 0, 12);
                                        int linearScaling = featureFactory.linearScaling(y, height, 12);
                                        if (linearScaling > 12) {
                                            linearScaling = 12;
                                        }
                                        featuresVectorFulltext2.relativePagePosition = linearScaling;
                                        if (maxBlockSpacing != XPath.MATCH_SCORE_QNAME) {
                                            featuresVectorFulltext2.spacingWithPreviousBlock = featureFactory.linearScaling(maxBlockSpacing - document.getMinBlockSpacing(), document.getMaxBlockSpacing() - document.getMinBlockSpacing(), 5);
                                        }
                                        if (d3 != -1.0d) {
                                            featuresVectorFulltext2.characterDensity = featureFactory.linearScaling(d3 - document.getMinCharacterDensity(), document.getMaxCharacterDensity() - document.getMinCharacterDensity(), 5);
                                        }
                                        featuresVectorFulltext2.calloutType = str2;
                                        if (referenceMarkerMatcher != null && (referenceMarkerMatcher.isKnownLabel(replaceAll) || referenceMarkerMatcher.isKnownFirstAuthor(replaceAll))) {
                                            featuresVectorFulltext2.calloutKnown = true;
                                        }
                                        if (featuresVectorFulltext != null) {
                                            if (featuresVectorFulltext2.blockStatus.equals("BLOCKSTART") && featuresVectorFulltext.blockStatus.equals("BLOCKIN")) {
                                                featuresVectorFulltext.blockStatus = "BLOCKEND";
                                                featuresVectorFulltext.lineStatus = "LINEEND";
                                            }
                                            sb.append(featuresVectorFulltext.printVector());
                                        }
                                        tokenBlockPos++;
                                        i4 += replaceAll.length();
                                        i5 += replaceAll.length();
                                        featuresVectorFulltext = featuresVectorFulltext2;
                                    }
                                }
                            }
                        }
                        d2 = block.getY() + block.getHeight();
                    }
                }
            }
        }
        if (featuresVectorFulltext != null) {
            sb.append(featuresVectorFulltext.printVector());
        }
        return Pair.of(sb.toString(), new LayoutTokenization(arrayList));
    }

    private static int getFulltextLength(Document document, SortedSet<DocumentPiece> sortedSet, int i) {
        for (DocumentPiece documentPiece : sortedSet) {
            DocumentPointer left = documentPiece.getLeft();
            DocumentPointer right = documentPiece.getRight();
            int tokenDocPos = left.getTokenDocPos();
            int tokenDocPos2 = right.getTokenDocPos();
            for (int i2 = tokenDocPos; i2 <= tokenDocPos2 && i2 < document.getTokenizations().size(); i2++) {
                i += document.getTokenizations().get(i2).getText().length();
            }
        }
        return i;
    }

    private static int getDocIndexToken(Document document, LayoutToken layoutToken) {
        int startToken = document.getBlocks().get(layoutToken.getBlockPtr()).getStartToken();
        List<LayoutToken> tokenizations = document.getTokenizations();
        int i = startToken;
        while (i < tokenizations.size() && tokenizations.get(i).getOffset() < layoutToken.getOffset()) {
            i++;
        }
        return i;
    }

    public Document createTraining(File file, String str, String str2, int i) {
        String authors;
        StringBuilder trainingExtraction;
        Pair<String, LayoutTokenization> bodyTextFeatured;
        if (this.tmpPath == null) {
            throw new GrobidResourceException("Cannot process pdf file, because temp path is null.");
        }
        if (!this.tmpPath.exists()) {
            throw new GrobidResourceException("Cannot process pdf file, because temp path '" + this.tmpPath.getAbsolutePath() + "' does not exists.");
        }
        try {
            try {
                if (!file.exists()) {
                    throw new GrobidResourceException("Cannot train for fulltext, becuase file '" + file.getAbsolutePath() + "' does not exists.");
                }
                String name = file.getName();
                DocumentSource fromPdf = DocumentSource.fromPdf(file, -1, -1, false, true, true);
                Document document = new Document(fromPdf);
                document.addTokenizedDocument(GrobidAnalysisConfig.defaultInstance());
                if (document.getBlocks() == null) {
                    throw new Exception("PDF parsing resulted in empty content");
                }
                document.produceStatistics();
                String allLinesFeatured = this.parsers.getSegmentationParser().getAllLinesFeatured(document);
                List<LayoutToken> tokenizationsFulltext = document.getTokenizationsFulltext();
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str + File.separator + name.replace(".pdf", ".training.segmentation")), false), StandardCharsets.UTF_8);
                outputStreamWriter.write(allLinesFeatured + "\n");
                outputStreamWriter.close();
                StringBuffer stringBuffer = new StringBuffer();
                Iterator<LayoutToken> it = tokenizationsFulltext.iterator();
                while (it.hasNext()) {
                    stringBuffer.append(it.next().getText());
                }
                FileUtils.writeStringToFile(new File(str + File.separator + name.replace(".pdf", ".training.segmentation.rawtxt")), stringBuffer.toString(), StandardCharsets.UTF_8);
                if (StringUtils.isNotBlank(allLinesFeatured)) {
                    StringBuffer trainingExtraction2 = this.parsers.getSegmentationParser().trainingExtraction(this.parsers.getSegmentationParser().label(allLinesFeatured), tokenizationsFulltext, document);
                    OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.segmentation.tei.xml")), false), StandardCharsets.UTF_8);
                    outputStreamWriter2.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + i + "\"/>\n\t</teiHeader>\n\t<text xml:lang=\"en\">\n");
                    outputStreamWriter2.write(trainingExtraction2.toString());
                    outputStreamWriter2.write("\n\t</text>\n</tei>\n");
                    outputStreamWriter2.close();
                }
                Document processing = this.parsers.getSegmentationParser().processing(fromPdf, GrobidAnalysisConfig.defaultInstance());
                String documentPartText = processing.getDocumentPartText(SegmentationLabels.REFERENCES);
                if (!documentPartText.isEmpty()) {
                    Pair<String, String> createTrainingData = this.parsers.getReferenceSegmenterParser().createTrainingData(processing, i);
                    String left = createTrainingData.getLeft();
                    String right = createTrainingData.getRight();
                    if (left != null) {
                        OutputStreamWriter outputStreamWriter3 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + name.replace(".pdf", ".training.references.referenceSegmenter.tei.xml")), false), StandardCharsets.UTF_8);
                        outputStreamWriter3.write(left + "\n");
                        outputStreamWriter3.close();
                        OutputStreamWriter outputStreamWriter4 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + name.replace(".pdf", ".training.references.referenceSegmenter")), false), StandardCharsets.UTF_8);
                        outputStreamWriter4.write(right + "\n");
                        outputStreamWriter4.close();
                        OutputStreamWriter outputStreamWriter5 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + name.replace(".pdf", ".training.references.referenceSegmenter.rawtxt")), false), StandardCharsets.UTF_8);
                        outputStreamWriter5.write(documentPartText + "\n");
                        outputStreamWriter5.close();
                    }
                }
                new StringBuilder();
                if (!documentPartText.isEmpty()) {
                    this.cntManager.i(CitationParserCounters.NOT_EMPTY_REFERENCES_BLOCKS);
                }
                ReferenceSegmenterParser referenceSegmenterParser = this.parsers.getReferenceSegmenterParser();
                List<LabeledReferenceResult> extract = referenceSegmenterParser.extract(processing);
                processing.setBibDataSets(this.parsers.getCitationParser().processingReferenceSection(processing, referenceSegmenterParser, 0));
                if (extract == null) {
                    this.cntManager.i(CitationParserCounters.NULL_SEGMENTED_REFERENCES_LIST);
                } else {
                    this.cntManager.i(CitationParserCounters.SEGMENTED_REFERENCES, extract.size());
                    ArrayList arrayList = new ArrayList();
                    Iterator<LabeledReferenceResult> it2 = extract.iterator();
                    while (it2.hasNext()) {
                        arrayList.add(it2.next().getReferenceText());
                    }
                    StringBuilder trainingExtraction3 = this.parsers.getCitationParser().trainingExtraction(arrayList);
                    if (trainingExtraction3 != null) {
                        trainingExtraction3.append("\n");
                        OutputStreamWriter outputStreamWriter6 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.references.tei.xml")), false), StandardCharsets.UTF_8);
                        outputStreamWriter6.write("<?xml version=\"1.0\" ?>\n<TEI xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" \n xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">\n");
                        if (i == -1) {
                            outputStreamWriter6.write("\t<teiHeader/>\n\t<text>\n\t\t<front/>\n\t\t<body/>\n\t\t<back>\n");
                        } else {
                            outputStreamWriter6.write("\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + i + "\"/>\n\t</teiHeader>\n\t<text>\n\t\t<front/>\n\t\t<body/>\n\t\t<back>\n");
                        }
                        outputStreamWriter6.write("<listBibl>\n");
                        outputStreamWriter6.write(trainingExtraction3.toString());
                        outputStreamWriter6.write("\t\t</listBibl>\n\t</back>\n\t</text>\n</TEI>\n");
                        outputStreamWriter6.close();
                        OutputStreamWriter outputStreamWriter7 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.references.authors.tei.xml")), false), StandardCharsets.UTF_8);
                        outputStreamWriter7.write("<?xml version=\"1.0\" ?>\n<TEI xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" \n xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">\n");
                        outputStreamWriter7.write("\t<teiHeader>\n\t\t<fileDesc>\n\t\t\t<sourceDesc>\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n\n");
                        for (LabeledReferenceResult labeledReferenceResult : extract) {
                            if (labeledReferenceResult.getReferenceText() != null && labeledReferenceResult.getReferenceText().trim().length() > 0 && (authors = this.parsers.getCitationParser().processing(labeledReferenceResult.getReferenceText(), 0).getAuthors()) != null && authors.trim().length() > 0 && (trainingExtraction = this.parsers.getAuthorParser().trainingExtraction(authors, false)) != null && trainingExtraction.length() > 0) {
                                outputStreamWriter7.write("\n\t\t\t\t\t\t<author>");
                                outputStreamWriter7.write(trainingExtraction.toString());
                                outputStreamWriter7.write("</author>\n");
                            }
                        }
                        outputStreamWriter7.write("\n\t\t\t\t\t</analytic>");
                        outputStreamWriter7.write("\n\t\t\t\t</biblStruct>\n\t\t\t</sourceDesc>\n\t\t</fileDesc>");
                        outputStreamWriter7.write("\n\t</teiHeader>\n</TEI>\n");
                        outputStreamWriter7.close();
                    }
                }
                SortedSet<DocumentPiece> documentPart = processing.getDocumentPart(SegmentationLabels.BODY);
                if (documentPart != null && (bodyTextFeatured = getBodyTextFeatured(processing, documentPart)) != null) {
                    String left2 = bodyTextFeatured.getLeft();
                    List<LayoutToken> tokenization = bodyTextFeatured.getRight().getTokenization();
                    OutputStreamWriter outputStreamWriter8 = new OutputStreamWriter(new FileOutputStream(new File(str + File.separator + name.replace(".pdf", ".training.fulltext")), false), StandardCharsets.UTF_8);
                    outputStreamWriter8.write(left2 + "\n");
                    outputStreamWriter8.close();
                    String label = label(left2);
                    StringBuilder trainingExtraction4 = trainingExtraction(label, tokenization);
                    OutputStreamWriter outputStreamWriter9 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.fulltext.tei.xml")), false), StandardCharsets.UTF_8);
                    if (i == -1) {
                        outputStreamWriter9.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader/>\n\t<text xml:lang=\"en\">\n");
                    } else {
                        outputStreamWriter9.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + i + "\"/>\n\t</teiHeader>\n\t<text xml:lang=\"en\">\n");
                    }
                    outputStreamWriter9.write(trainingExtraction4.toString());
                    outputStreamWriter9.write("\n\t</text>\n</tei>\n");
                    outputStreamWriter9.close();
                    Pair<String, String> processTrainingDataFigures = processTrainingDataFigures(label, tokenization, file.getName());
                    if (processTrainingDataFigures.getLeft().trim().length() > 0) {
                        OutputStreamWriter outputStreamWriter10 = new OutputStreamWriter(new FileOutputStream(new File(str + File.separator + name.replace(".pdf", ".training.figure")), false), StandardCharsets.UTF_8);
                        outputStreamWriter10.write(processTrainingDataFigures.getRight() + "\n\n");
                        outputStreamWriter10.close();
                        OutputStreamWriter outputStreamWriter11 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.figure.tei.xml")), false), StandardCharsets.UTF_8);
                        outputStreamWriter11.write(processTrainingDataFigures.getLeft() + "\n");
                        outputStreamWriter11.close();
                    }
                    Pair<String, String> processTrainingDataTables = processTrainingDataTables(label, tokenization, file.getName());
                    if (processTrainingDataTables.getLeft().trim().length() > 0) {
                        OutputStreamWriter outputStreamWriter12 = new OutputStreamWriter(new FileOutputStream(new File(str + File.separator + name.replace(".pdf", ".training.table")), false), StandardCharsets.UTF_8);
                        outputStreamWriter12.write(processTrainingDataTables.getRight() + "\n\n");
                        outputStreamWriter12.close();
                        OutputStreamWriter outputStreamWriter13 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.table.tei.xml")), false), StandardCharsets.UTF_8);
                        outputStreamWriter13.write(processTrainingDataTables.getLeft() + "\n");
                        outputStreamWriter13.close();
                    }
                }
                SortedSet<DocumentPiece> documentPart2 = processing.getDocumentPart(SegmentationLabels.HEADER);
                List<LayoutToken> tokenizations = processing.getTokenizations();
                if (documentPart2 != null) {
                    ArrayList arrayList2 = new ArrayList();
                    for (DocumentPiece documentPiece : documentPart2) {
                        DocumentPointer left3 = documentPiece.getLeft();
                        DocumentPointer right2 = documentPiece.getRight();
                        int tokenDocPos = left3.getTokenDocPos();
                        int tokenDocPos2 = right2.getTokenDocPos();
                        for (int i2 = tokenDocPos; i2 < tokenDocPos2; i2++) {
                            arrayList2.add(tokenizations.get(i2));
                        }
                    }
                    String left4 = this.parsers.getHeaderParser().getSectionHeaderFeatured(processing, documentPart2, true).getLeft();
                    if (left4 != null && left4.trim().length() > 0) {
                        String label2 = this.parsers.getHeaderParser().label(left4);
                        OutputStreamWriter outputStreamWriter14 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.header")), false), StandardCharsets.UTF_8);
                        outputStreamWriter14.write(left4 + "\n");
                        outputStreamWriter14.close();
                        StringBuilder trainingExtraction5 = this.parsers.getHeaderParser().trainingExtraction(label2, true, arrayList2);
                        Language runLanguageId = LanguageUtilities.getInstance().runLanguageId(trainingExtraction5.toString());
                        if (runLanguageId != null) {
                            processing.setLanguage(runLanguageId.getLang());
                        }
                        StringBuilder trainingExtraction6 = this.parsers.getAffiliationAddressParser().trainingExtraction(label2, arrayList2);
                        StringBuilder sb = null;
                        String str3 = "";
                        int i3 = 0;
                        StringTokenizer stringTokenizer = new StringTokenizer(label2, "\n");
                        while (stringTokenizer.hasMoreTokens() && i3 < arrayList2.size()) {
                            String nextToken = stringTokenizer.nextToken();
                            String text = ((LayoutToken) arrayList2.get(i3)).getText();
                            String text2 = ((LayoutToken) arrayList2.get(i3)).getText();
                            while (true) {
                                if (!text2.equals(" ") && !text2.equals("\t") && !text2.equals("\n") && !text2.equals("\r")) {
                                    break;
                                }
                                i3++;
                                if (i3 > 0 && i3 < arrayList2.size()) {
                                    text2 = ((LayoutToken) arrayList2.get(i3)).getText();
                                    text = text + text2;
                                }
                            }
                            if (nextToken.endsWith(TaggingLabels.DATE_LABEL)) {
                                str3 = str3 + text;
                            }
                            i3++;
                        }
                        if (str3.trim().length() > 1) {
                            ArrayList arrayList3 = new ArrayList();
                            arrayList3.add(str3.trim());
                            sb = this.parsers.getDateParser().trainingExtraction(arrayList3);
                        }
                        String str4 = "";
                        int i4 = 0;
                        StringTokenizer stringTokenizer2 = new StringTokenizer(label2, "\n");
                        while (stringTokenizer2.hasMoreTokens() && i4 < arrayList2.size()) {
                            String nextToken2 = stringTokenizer2.nextToken();
                            String text3 = ((LayoutToken) arrayList2.get(i4)).getText();
                            String text4 = ((LayoutToken) arrayList2.get(i4)).getText();
                            while (true) {
                                if (!text4.equals(" ") && !text4.equals("\t") && !text4.equals("\n") && !text4.equals("\r")) {
                                    break;
                                }
                                i4++;
                                if (i4 > 0 && i4 < arrayList2.size()) {
                                    text4 = ((LayoutToken) arrayList2.get(i4)).getText();
                                    text3 = text3 + text4;
                                }
                            }
                            if (nextToken2.endsWith(TaggingLabels.AUTHOR_LABEL)) {
                                str4 = str4 + text3;
                            }
                            i4++;
                        }
                        StringBuilder trainingExtraction7 = str4.length() > 1 ? this.parsers.getAuthorParser().trainingExtraction(str4, true) : null;
                        StringBuilder sb2 = null;
                        String str5 = "";
                        int i5 = 0;
                        StringTokenizer stringTokenizer3 = new StringTokenizer(label2, "\n");
                        while (stringTokenizer3.hasMoreTokens() && i5 < arrayList2.size()) {
                            String nextToken3 = stringTokenizer3.nextToken();
                            String text5 = ((LayoutToken) arrayList2.get(i5)).getText();
                            String text6 = ((LayoutToken) arrayList2.get(i5)).getText();
                            while (true) {
                                if (!text6.equals(" ") && !text6.equals("\t") && !text6.equals("\n") && !text6.equals("\r")) {
                                    break;
                                }
                                i5++;
                                if (i5 > 0 && i5 < arrayList2.size()) {
                                    text6 = ((LayoutToken) arrayList2.get(i5)).getText();
                                    text5 = text5 + text6;
                                }
                            }
                            if (nextToken3.endsWith(TaggingLabels.REFERENCE_LABEL)) {
                                str5 = str5 + text5;
                            }
                            i5++;
                        }
                        if (str5.length() > 1) {
                            ArrayList arrayList4 = new ArrayList();
                            arrayList4.add(str5.trim());
                            sb2 = this.parsers.getCitationParser().trainingExtraction(arrayList4);
                        }
                        OutputStreamWriter outputStreamWriter15 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.header.tei.xml")), false), StandardCharsets.UTF_8);
                        outputStreamWriter15.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + name.replace(".pdf", "") + "\"/>\n\t</teiHeader>\n\t<text");
                        if (runLanguageId != null) {
                            outputStreamWriter15.write(" xml:lang=\"" + runLanguageId.getLang() + "\"");
                        }
                        outputStreamWriter15.write(">\n\t\t<front>\n");
                        outputStreamWriter15.write(trainingExtraction5.toString());
                        outputStreamWriter15.write("\n\t\t</front>\n\t</text>\n</tei>\n");
                        outputStreamWriter15.close();
                        if (trainingExtraction6 != null && trainingExtraction6.length() > 0) {
                            OutputStreamWriter outputStreamWriter16 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.header.affiliation.tei.xml")), false), StandardCharsets.UTF_8);
                            outputStreamWriter16.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
                            outputStreamWriter16.write("\n<tei xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">");
                            outputStreamWriter16.write("\n\t<teiHeader>\n\t\t<fileDesc>\n\t\t\t<sourceDesc>");
                            outputStreamWriter16.write("\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n\t\t\t\t\t\t<author>\n\n");
                            outputStreamWriter16.write(trainingExtraction6.toString());
                            outputStreamWriter16.write("\n\t\t\t\t\t\t</author>\n\t\t\t\t\t</analytic>");
                            outputStreamWriter16.write("\n\t\t\t\t</biblStruct>\n\t\t\t</sourceDesc>\n\t\t</fileDesc>");
                            outputStreamWriter16.write("\n\t</teiHeader>\n</tei>\n");
                            outputStreamWriter16.close();
                        }
                        if (sb != null && sb.length() > 0) {
                            OutputStreamWriter outputStreamWriter17 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.header.date.xml")), false), StandardCharsets.UTF_8);
                            outputStreamWriter17.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
                            outputStreamWriter17.write("<dates>\n");
                            outputStreamWriter17.write(sb.toString());
                            outputStreamWriter17.write("</dates>\n");
                            outputStreamWriter17.close();
                        }
                        if (trainingExtraction7 != null && trainingExtraction7.length() > 0) {
                            OutputStreamWriter outputStreamWriter18 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.header.authors.tei.xml")), false), StandardCharsets.UTF_8);
                            outputStreamWriter18.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
                            outputStreamWriter18.write("\n<tei xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">");
                            outputStreamWriter18.write("\n\t<teiHeader>\n\t\t<fileDesc>\n\t\t\t<sourceDesc>");
                            outputStreamWriter18.write("\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n\n\t\t\t\t\t\t<author>");
                            outputStreamWriter18.write("\n\t\t\t\t\t\t\t<persName>\n");
                            outputStreamWriter18.write(trainingExtraction7.toString());
                            outputStreamWriter18.write("\t\t\t\t\t\t\t</persName>\n");
                            outputStreamWriter18.write("\t\t\t\t\t\t</author>\n\n\t\t\t\t\t</analytic>");
                            outputStreamWriter18.write("\n\t\t\t\t</biblStruct>\n\t\t\t</sourceDesc>\n\t\t</fileDesc>");
                            outputStreamWriter18.write("\n\t</teiHeader>\n</tei>\n");
                            outputStreamWriter18.close();
                        }
                        if (sb2 != null && sb2.length() > 0) {
                            OutputStreamWriter outputStreamWriter19 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.header.reference.xml")), false), StandardCharsets.UTF_8);
                            outputStreamWriter19.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
                            outputStreamWriter19.write("<citations>\n");
                            outputStreamWriter19.write(sb2.toString());
                            outputStreamWriter19.write("</citations>\n");
                            outputStreamWriter19.close();
                        }
                    }
                }
                DocumentSource.close(fromPdf, true, true, true);
                return processing;
            } catch (Exception e) {
                throw new GrobidException("An exception occurred while running Grobid training data generation for full text.", e);
            }
        } catch (Throwable th) {
            DocumentSource.close(null, true, true, true);
            throw th;
        }
    }

    private StringBuilder trainingExtraction(String str, List<LayoutToken> list) {
        StringBuilder sb = new StringBuilder();
        try {
            StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
            String str2 = null;
            String str3 = null;
            String str4 = null;
            int i = 0;
            boolean z = true;
            while (stringTokenizer.hasMoreTokens()) {
                boolean z2 = false;
                String trim = stringTokenizer.nextToken().trim();
                if (trim.length() != 0) {
                    StringTokenizer stringTokenizer2 = new StringTokenizer(trim, " \t");
                    ArrayList arrayList = new ArrayList();
                    int i2 = 0;
                    boolean z3 = false;
                    int countTokens = stringTokenizer2.countTokens();
                    while (stringTokenizer2.hasMoreTokens()) {
                        String trim2 = stringTokenizer2.nextToken().trim();
                        if (i2 == 0) {
                            str3 = TextUtilities.HTMLEncode(trim2);
                            int i3 = i;
                            boolean z4 = false;
                            while (!z4 && i < list.size()) {
                                String t = list.get(i).t();
                                if (t.equals(" ") || t.equals(" ")) {
                                    z2 = true;
                                } else if (t.equals("\n")) {
                                    z3 = true;
                                } else if (t.equals(trim2)) {
                                    z4 = true;
                                }
                                i++;
                            }
                            if (i == list.size() && i - i3 > 2) {
                                i = i3;
                            }
                        } else if (i2 == countTokens - 1) {
                            str2 = trim2;
                        } else {
                            if (trim2.equals("LINESTART")) {
                                z3 = true;
                            }
                            arrayList.add(trim2);
                        }
                        i2++;
                    }
                    if (z3 && !z) {
                        sb.append("<lb/>");
                    }
                    String str5 = null;
                    if (str4 != null) {
                        str5 = str4.startsWith("I-") ? str4.substring(2, str4.length()) : str4;
                    }
                    String str6 = null;
                    if (str2 != null) {
                        str6 = str2.startsWith("I-") ? str2.substring(2, str2.length()) : str2;
                    }
                    boolean z5 = false;
                    if (str4 != null) {
                        z5 = testClosingTag(sb, str6, str5, str2);
                    }
                    boolean writeField = writeField(sb, str2, str5, str3, TaggingLabels.OTHER_LABEL, "<note type=\"other\">", z2, 3, false);
                    if (!writeField) {
                        writeField = z5 ? writeFieldBeginEnd(sb, str2, "", str3, TaggingLabels.PARAGRAPH_LABEL, "<p>", z2, 3, false) : writeFieldBeginEnd(sb, str2, str4, str3, TaggingLabels.PARAGRAPH_LABEL, "<p>", z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.CITATION_MARKER_LABEL, "<ref type=\"biblio\">", z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.TABLE_MARKER_LABEL, "<ref type=\"table\">", z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.EQUATION_MARKER_LABEL, "<ref type=\"formula\">", z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.SECTION_LABEL, "<head>", z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.EQUATION_LAB, "<formula>", z2, 4, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.EQUATION_ID_LABEL, TaggingLabels.LABEL_LABEL, z2, 4, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.FIGURE_MARKER_LABEL, "<ref type=\"figure\">", z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.FIGURE_LABEL, TaggingLabels.FIGURE_LABEL, z2, 3, false);
                    }
                    if (!writeField) {
                        writeField = writeField(sb, str2, str5, str3, TaggingLabels.TABLE_LABEL, "<figure type=\"table\">", z2, 3, false);
                    }
                    if (!writeField) {
                        writeFieldBeginEnd(sb, str2, str4, str3, TaggingLabels.ITEM_LABEL, TaggingLabels.ITEM_LABEL, z2, 3, false);
                    }
                    str4 = str2;
                    if (!stringTokenizer.hasMoreTokens() && str4 != null) {
                        testClosingTag(sb, "", str6, str2);
                    }
                    if (z) {
                        z = false;
                    }
                }
            }
            return sb;
        } catch (Exception e) {
            e.printStackTrace();
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public static boolean writeField(StringBuilder sb, String str, String str2, String str3, String str4, String str5, boolean z, int i, boolean z2) {
        boolean z3 = false;
        if (str == null) {
            return false;
        }
        if (str.equals(str4) || str.equals("I-" + str4)) {
            z3 = true;
            if (z2) {
                String substring = KeyGen.getKey().substring(0, 7);
                if (str5.charAt(str5.length() - 2) == '>') {
                    str5 = str5.substring(0, str5.length() - 2) + " xml:id=\"_" + substring + "\">";
                }
            }
            if (str.equals(str2) || str.equals("I-" + str2)) {
                if (z) {
                    sb.append(" ").append(str3);
                } else {
                    sb.append(str3);
                }
            } else if (str4.equals(TaggingLabels.CITATION_MARKER_LABEL)) {
                if (z) {
                    sb.append(" ").append(str5).append(str3);
                } else {
                    sb.append(str5).append(str3);
                }
            } else if (str4.equals(TaggingLabels.FIGURE_MARKER_LABEL)) {
                if (z) {
                    sb.append(" ").append(str5).append(str3);
                } else {
                    sb.append(str5).append(str3);
                }
            } else if (str4.equals(TaggingLabels.TABLE_MARKER_LABEL)) {
                if (z) {
                    sb.append(" ").append(str5).append(str3);
                } else {
                    sb.append(str5).append(str3);
                }
            } else if (str4.equals(TaggingLabels.EQUATION_MARKER_LABEL)) {
                if (z) {
                    sb.append(" ").append(str5).append(str3);
                } else {
                    sb.append(str5).append(str3);
                }
            } else if (str2 == null) {
                for (int i2 = 0; i2 < i; i2++) {
                    sb.append("\t");
                }
                sb.append(str5).append(str3);
            } else if (!str2.equals(TaggingLabels.CITATION_MARKER_LABEL) && !str2.equals(TaggingLabels.FIGURE_MARKER_LABEL) && !str2.equals(TaggingLabels.EQUATION_MARKER_LABEL)) {
                for (int i3 = 0; i3 < i; i3++) {
                    sb.append("\t");
                }
                sb.append(str5).append(str3);
            } else if (z) {
                sb.append(" ").append(str3);
            } else {
                sb.append(str3);
            }
        }
        return z3;
    }

    public static boolean writeFieldBeginEnd(StringBuilder sb, String str, String str2, String str3, String str4, String str5, boolean z, int i, boolean z2) {
        boolean z3 = false;
        if (str == null) {
            return false;
        }
        if (str.equals(str4) || str.equals("I-" + str4)) {
            z3 = true;
            if (str2 == null) {
                str2 = "";
            }
            if (z2) {
                String substring = KeyGen.getKey().substring(0, 7);
                if (str5.charAt(str5.length() - 2) == '>') {
                    str5 = str5.substring(0, str5.length() - 2) + " xml:id=\"_" + substring + "\">";
                }
            }
            if (str2.equals("I-" + str4)) {
                if (z) {
                    sb.append(" ").append(str3);
                } else {
                    sb.append(str3);
                }
            } else if (str2.equals(str4) && str.equals(str4)) {
                if (z) {
                    sb.append(" ").append(str3);
                } else {
                    sb.append(str3);
                }
            } else if (!str2.endsWith(TaggingLabels.CITATION_MARKER_LABEL) && !str2.endsWith(TaggingLabels.FIGURE_MARKER_LABEL) && !str2.endsWith(TaggingLabels.TABLE_MARKER_LABEL) && !str2.endsWith(TaggingLabels.EQUATION_MARKER_LABEL)) {
                for (int i2 = 0; i2 < i; i2++) {
                    sb.append("\t");
                }
                sb.append(str5).append(str3);
            } else if (z) {
                sb.append(" ").append(str3);
            } else {
                sb.append(str3);
            }
        }
        return z3;
    }

    private static boolean testClosingTag(StringBuilder sb, String str, String str2, String str3) {
        boolean z = false;
        if (!str.equals(str2) || str3.equals("I-<paragraph>") || str3.equals("I-<item>")) {
            if (str.equals(TaggingLabels.CITATION_MARKER_LABEL) || str.equals(TaggingLabels.EQUATION_MARKER_LABEL) || str.equals(TaggingLabels.FIGURE_MARKER_LABEL) || str.equals(TaggingLabels.TABLE_MARKER_LABEL)) {
                return false;
            }
            z = false;
            if (str2.equals(TaggingLabels.OTHER_LABEL)) {
                sb.append("</note>\n\n");
            } else if (str2.equals(TaggingLabels.PARAGRAPH_LABEL) && !str.equals(TaggingLabels.CITATION_MARKER_LABEL) && !str.equals(TaggingLabels.TABLE_MARKER_LABEL) && !str.equals(TaggingLabels.EQUATION_MARKER_LABEL) && !str.equals(TaggingLabels.FIGURE_MARKER_LABEL)) {
                sb.append("</p>\n\n");
                z = true;
            } else if (str2.equals(TaggingLabels.SECTION_LABEL)) {
                sb.append("</head>\n\n");
            } else if (str2.equals("<subsection>")) {
                sb.append("</head>\n\n");
            } else if (str2.equals(TaggingLabels.EQUATION_LAB)) {
                sb.append("</formula>\n\n");
            } else if (str2.equals(TaggingLabels.EQUATION_ID_LABEL)) {
                sb.append("</label>\n\n");
            } else if (str2.equals(TaggingLabels.TABLE_LABEL)) {
                sb.append("</table>\n\n");
            } else if (str2.equals(TaggingLabels.FIGURE_LABEL)) {
                sb.append("</figure>\n\n");
            } else if (str2.equals(TaggingLabels.ITEM_LABEL)) {
                sb.append("</item>\n\n");
            } else if (str2.equals(TaggingLabels.CITATION_MARKER_LABEL)) {
                sb.append("</ref>");
            } else if (str2.equals(TaggingLabels.FIGURE_MARKER_LABEL)) {
                sb.append("</ref>");
            } else if (str2.equals(TaggingLabels.TABLE_MARKER_LABEL)) {
                sb.append("</ref>");
            } else if (str2.equals(TaggingLabels.EQUATION_MARKER_LABEL)) {
                sb.append("</ref>");
            } else {
                z = false;
            }
        }
        return z;
    }

    private List<Figure> processFigures(String str, List<LayoutToken> list, Document document) {
        ArrayList arrayList = new ArrayList();
        for (TaggingTokenCluster taggingTokenCluster : Iterables.filter(new TaggingTokenClusteror(GrobidModels.FULLTEXT, str, list, true).cluster(), new TaggingTokenClusteror.LabelTypePredicate(TaggingLabels.FIGURE))) {
            List<LayoutToken> concatTokens = taggingTokenCluster.concatTokens();
            Figure processing = this.parsers.getFigureParser().processing(concatTokens, taggingTokenCluster.getFeatureBlock());
            TreeSet treeSet = new TreeSet();
            for (LayoutToken layoutToken : concatTokens) {
                if (!LayoutTokensUtil.spaceyToken(layoutToken.t()) && !LayoutTokensUtil.newLineToken(layoutToken.t())) {
                    treeSet.add(Integer.valueOf(layoutToken.getBlockPtr()));
                }
            }
            processing.setBlockPtrs(treeSet);
            processing.setLayoutTokens(concatTokens);
            Iterator<LayoutToken> it = concatTokens.iterator();
            while (true) {
                if (it.hasNext()) {
                    LayoutToken next = it.next();
                    if (!LayoutTokensUtil.spaceyToken(next.t()) && !LayoutTokensUtil.newLineToken(next.t())) {
                        processing.setPage(next.getPage());
                        break;
                    }
                }
            }
            arrayList.add(processing);
            processing.setId("" + (arrayList.size() - 1));
        }
        document.setFigures(arrayList);
        document.assignGraphicObjectsToFigures();
        return arrayList;
    }

    private Pair<String, String> processTrainingDataFigures(String str, List<LayoutToken> list, String str2) {
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        int i = 0;
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
        boolean z = false;
        StringBuilder sb3 = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            String[] split = nextToken.split("\t");
            String trim = split[0].trim();
            int i3 = i2;
            boolean z2 = false;
            ArrayList arrayList2 = new ArrayList();
            while (!z2 && i2 < list.size()) {
                String trim2 = list.get(i2).getText().trim();
                if (z) {
                    arrayList.add(list.get(i2));
                }
                arrayList2.add(list.get(i2));
                if (trim2.equals(trim)) {
                    z2 = true;
                }
                i2++;
            }
            if (i2 != list.size() || i2 - i3 <= 2) {
                String str3 = split[split.length - 1];
                GenericTaggerUtils.getPlainLabel(str3);
                if (str3.equals(TaggingLabels.FIGURE_LABEL) || (str3.equals("I-<figure>") && !z)) {
                    if (!z) {
                        Iterator it = arrayList2.iterator();
                        while (it.hasNext()) {
                            arrayList.add((LayoutToken) it.next());
                        }
                        z = true;
                    }
                    sb3.append(nextToken.substring(0, nextToken.lastIndexOf("\t"))).append("\n");
                } else if (str3.equals("I-<figure>") || z) {
                    if (arrayList.size() > 0) {
                        int size = arrayList2.size();
                        for (int i4 = 0; i4 < size; i4++) {
                            arrayList.remove(arrayList.size() - 1);
                        }
                    }
                    if (i2 != list.size() && (list.get(i2).getText().equals("\n") || list.get(i2).getText().equals("\r") || list.get(i2).getText().equals(" "))) {
                        arrayList.add(list.get(i2));
                        i2++;
                    }
                    while (arrayList.size() > 0 && (((LayoutToken) arrayList.get(0)).getText().equals("\n") || ((LayoutToken) arrayList.get(0)).getText().equals(" "))) {
                        arrayList.remove(0);
                    }
                    Pair<String, String> createTrainingData = this.parsers.getFigureParser().createTrainingData(arrayList, sb3.toString(), "Fig" + i);
                    arrayList = new ArrayList();
                    sb3 = new StringBuilder();
                    if (createTrainingData != null) {
                        if (sb.length() == 0) {
                            sb.append(this.parsers.getFigureParser().getTEIHeader(str2)).append("\n\n");
                        }
                        if (createTrainingData.getLeft() != null) {
                            sb.append(createTrainingData.getLeft()).append("\n\n");
                        }
                        if (createTrainingData.getRight() != null) {
                            sb2.append(createTrainingData.getRight()).append("\n\n");
                        }
                    }
                    if (str3.equals("I-<figure>")) {
                        Iterator it2 = arrayList2.iterator();
                        while (it2.hasNext()) {
                            arrayList.add((LayoutToken) it2.next());
                        }
                        sb3.append(nextToken.substring(0, nextToken.lastIndexOf("\t"))).append("\n");
                    } else {
                        z = false;
                    }
                    i++;
                } else {
                    z = false;
                }
            } else {
                i2 = i3;
            }
        }
        if (sb.length() != 0) {
            sb.append("\n    </text>\n</tei>\n");
        }
        return Pair.of(sb.toString(), sb2.toString());
    }

    private List<Table> processTables(String str, List<LayoutToken> list, Document document) {
        ArrayList arrayList = new ArrayList();
        for (TaggingTokenCluster taggingTokenCluster : Iterables.filter(new TaggingTokenClusteror(GrobidModels.FULLTEXT, str, list, true).cluster(), new TaggingTokenClusteror.LabelTypePredicate(TaggingLabels.TABLE))) {
            List<LayoutToken> concatTokens = taggingTokenCluster.concatTokens();
            Table processing = this.parsers.getTableParser().processing(concatTokens, taggingTokenCluster.getFeatureBlock());
            TreeSet treeSet = new TreeSet();
            for (LayoutToken layoutToken : concatTokens) {
                if (!LayoutTokensUtil.spaceyToken(layoutToken.t()) && !LayoutTokensUtil.newLineToken(layoutToken.t())) {
                    treeSet.add(Integer.valueOf(layoutToken.getBlockPtr()));
                }
            }
            processing.setBlockPtrs(treeSet);
            processing.setLayoutTokens(concatTokens);
            Iterator<LayoutToken> it = concatTokens.iterator();
            while (true) {
                if (it.hasNext()) {
                    LayoutToken next = it.next();
                    if (!LayoutTokensUtil.spaceyToken(next.t()) && !LayoutTokensUtil.newLineToken(next.t())) {
                        processing.setPage(next.getPage());
                        break;
                    }
                }
            }
            arrayList.add(processing);
            processing.setId("" + (arrayList.size() - 1));
        }
        document.setTables(arrayList);
        document.postProcessTables();
        return arrayList;
    }

    private Pair<String, String> processTrainingDataTables(String str, List<LayoutToken> list, String str2) {
        StringBuilder sb = new StringBuilder();
        StringBuilder sb2 = new StringBuilder();
        int i = 0;
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
        boolean z = false;
        StringBuilder sb3 = new StringBuilder();
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            String[] split = nextToken.split("\t");
            String trim = split[0].trim();
            int i3 = i2;
            boolean z2 = false;
            ArrayList arrayList2 = new ArrayList();
            while (!z2 && i2 < list.size()) {
                String trim2 = list.get(i2).getText().trim();
                if (z) {
                    arrayList.add(list.get(i2));
                }
                arrayList2.add(list.get(i2));
                if (trim2.equals(trim)) {
                    z2 = true;
                }
                i2++;
            }
            if (i2 != list.size() || i2 - i3 <= 2) {
                String str3 = split[split.length - 1];
                GenericTaggerUtils.getPlainLabel(str3);
                if (str3.equals(TaggingLabels.TABLE_LABEL) || (str3.equals("I-<table>") && !z)) {
                    if (!z) {
                        Iterator it = arrayList2.iterator();
                        while (it.hasNext()) {
                            arrayList.add((LayoutToken) it.next());
                        }
                        z = true;
                    }
                    sb3.append(nextToken.substring(0, nextToken.lastIndexOf("\t"))).append("\n");
                } else if (str3.equals("I-<table>") || z) {
                    if (arrayList.size() > 0) {
                        int size = arrayList2.size();
                        for (int i4 = 0; i4 < size; i4++) {
                            arrayList.remove(arrayList.size() - 1);
                        }
                    }
                    if (i2 != list.size() && (list.get(i2).getText().equals("\n") || list.get(i2).getText().equals("\r") || list.get(i2).getText().equals(" "))) {
                        arrayList.add(list.get(i2));
                        i2++;
                    }
                    while (arrayList.size() > 0 && (((LayoutToken) arrayList.get(0)).getText().equals("\n") || ((LayoutToken) arrayList.get(0)).getText().equals(" "))) {
                        arrayList.remove(0);
                    }
                    Pair<String, String> createTrainingData = this.parsers.getTableParser().createTrainingData(arrayList, sb3.toString(), "Fig" + i);
                    arrayList = new ArrayList();
                    sb3 = new StringBuilder();
                    if (createTrainingData != null) {
                        if (sb.length() == 0) {
                            sb.append(this.parsers.getTableParser().getTEIHeader(str2)).append("\n\n");
                        }
                        if (createTrainingData.getLeft() != null) {
                            sb.append(createTrainingData.getLeft()).append("\n\n");
                        }
                        if (createTrainingData.getRight() != null) {
                            sb2.append(createTrainingData.getRight()).append("\n\n");
                        }
                    }
                    if (str3.equals("I-<table>")) {
                        arrayList.addAll(arrayList2);
                        sb3.append(nextToken.substring(0, nextToken.lastIndexOf("\t"))).append("\n");
                    } else {
                        z = false;
                    }
                    i++;
                } else {
                    z = false;
                }
            } else {
                i2 = i3;
            }
        }
        if (sb.length() != 0) {
            sb.append("\n    </text>\n</tei>\n");
        }
        return Pair.of(sb.toString(), sb2.toString());
    }

    private List<Equation> processEquations(String str, List<LayoutToken> list, Document document) {
        ArrayList arrayList = new ArrayList();
        Equation equation = null;
        for (TaggingTokenCluster taggingTokenCluster : new TaggingTokenClusteror(GrobidModels.FULLTEXT, str, list, true).cluster()) {
            if (taggingTokenCluster != null) {
                TaggingLabel taggingLabel = taggingTokenCluster.getTaggingLabel();
                Engine.getCntManager().i(taggingLabel);
                if (taggingLabel == TaggingLabels.EQUATION || taggingLabel == TaggingLabels.EQUATION_LABEL) {
                    taggingTokenCluster.concatTokens();
                    String normalizeText = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(taggingTokenCluster.concatTokens()));
                    if (equation == null) {
                        equation = new Equation();
                    }
                    if (!equation.getContent().isEmpty() && !equation.getLabel().isEmpty()) {
                        arrayList.add(equation);
                        equation.setId("" + (arrayList.size() - 1));
                        equation = new Equation();
                    }
                    if (taggingLabel.equals(TaggingLabels.EQUATION)) {
                        if (!equation.getContent().isEmpty()) {
                            arrayList.add(equation);
                            equation.setId("" + (arrayList.size() - 1));
                            equation = new Equation();
                        }
                        equation.appendContent(normalizeText);
                        equation.addLayoutTokens(taggingTokenCluster.concatTokens());
                    } else if (taggingLabel.equals(TaggingLabels.EQUATION_LABEL)) {
                        equation.appendLabel(normalizeText);
                        equation.addLayoutTokens(taggingTokenCluster.concatTokens());
                    }
                } else if (equation != null) {
                    arrayList.add(equation);
                    equation.setId("" + (arrayList.size() - 1));
                    equation = null;
                }
            }
        }
        if (equation != null) {
            arrayList.add(equation);
            equation.setId("" + (arrayList.size() - 1));
        }
        document.setEquations(arrayList);
        return arrayList;
    }

    private void toTEI(Document document, String str, String str2, LayoutTokenization layoutTokenization, List<LayoutToken> list, BiblioItem biblioItem, List<Figure> list2, List<Table> list3, List<Equation> list4, GrobidAnalysisConfig grobidAnalysisConfig) {
        if (document.getBlocks() == null) {
            return;
        }
        List<BibDataSet> bibDataSets = document.getBibDataSets();
        TEIFormatter tEIFormatter = new TEIFormatter(document, this);
        try {
            StringBuilder tEIBody = tEIFormatter.toTEIBody(tEIFormatter.toTEIHeader(biblioItem, null, bibDataSets, grobidAnalysisConfig), str, biblioItem, bibDataSets, layoutTokenization, list2, list3, list4, document, grobidAnalysisConfig);
            tEIBody.append("\t\t<back>\n");
            Pair<String, LayoutTokenization> bodyTextFeatured = getBodyTextFeatured(document, document.getDocumentPart(SegmentationLabels.ACKNOWLEDGEMENT));
            if (bodyTextFeatured != null) {
                String left = bodyTextFeatured.getLeft();
                List<LayoutToken> tokenization = bodyTextFeatured.getRight().getTokenization();
                String str3 = null;
                if (left != null && left.length() > 0) {
                    str3 = label(left);
                }
                tEIBody = tEIFormatter.toTEIAcknowledgement(tEIBody, str3, tokenization, bibDataSets, grobidAnalysisConfig);
            }
            StringBuilder tEIReferences = tEIFormatter.toTEIReferences(tEIFormatter.toTEIAnnex(tEIBody, str2, biblioItem, bibDataSets, list, document, grobidAnalysisConfig), bibDataSets, grobidAnalysisConfig);
            document.calculateTeiIdToBibDataSets();
            tEIReferences.append("\t\t</back>\n");
            tEIReferences.append("\t</text>\n");
            tEIReferences.append("</TEI>\n");
            document.setTei(tEIReferences.toString());
        } catch (Exception e) {
            throw new GrobidException("An exception occurred while running Grobid.", e);
        }
    }

    public static List<LayoutTokenization> getDocumentFullTextTokens(List<TaggingLabel> list, String str, List<LayoutToken> list2) {
        List<TaggingTokenCluster> cluster = new TaggingTokenClusteror(GrobidModels.FULLTEXT, str, list2).cluster();
        ArrayList arrayList = new ArrayList();
        LayoutTokenization layoutTokenization = null;
        for (TaggingTokenCluster taggingTokenCluster : cluster) {
            if (taggingTokenCluster != null) {
                TaggingLabel taggingLabel = taggingTokenCluster.getTaggingLabel();
                List<LayoutToken> concatTokens = taggingTokenCluster.concatTokens();
                if (inlineFullTextLabels.contains(taggingLabel)) {
                    if (layoutTokenization == null) {
                        layoutTokenization = new LayoutTokenization();
                    }
                } else if (layoutTokenization != null && layoutTokenization.size() > 0) {
                    arrayList.add(layoutTokenization);
                    layoutTokenization = new LayoutTokenization();
                }
                if (list.contains(taggingLabel)) {
                    if (layoutTokenization == null) {
                        layoutTokenization = new LayoutTokenization();
                    }
                    layoutTokenization.addTokens(concatTokens);
                }
            }
        }
        if (layoutTokenization != null && layoutTokenization.size() > 0) {
            arrayList.add(layoutTokenization);
        }
        return arrayList;
    }

    @Override // org.grobid.core.engines.AbstractParser, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        super.close();
    }
}
