package org.grobid.core.engines;

import com.google.common.base.Splitter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.grobid.core.GrobidModels;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.Date;
import org.grobid.core.data.Keyword;
import org.grobid.core.data.Person;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentPiece;
import org.grobid.core.document.DocumentPointer;
import org.grobid.core.document.DocumentSource;
import org.grobid.core.document.TEIFormatter;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.label.SegmentationLabels;
import org.grobid.core.engines.label.TaggingLabels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidExceptionStatus;
import org.grobid.core.features.FeatureFactory;
import org.grobid.core.features.FeaturesVectorHeader;
import org.grobid.core.lang.Language;
import org.grobid.core.layout.Block;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.Consolidation;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.counters.CntManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wipo.analyzers.wipokr.utils.EomiUtil;

/* loaded from: input_file:org/grobid/core/engines/HeaderParser.class */
public class HeaderParser extends AbstractParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(HeaderParser.class);
    private LanguageUtilities languageUtilities;
    private EngineParsers parsers;

    public HeaderParser(EngineParsers engineParsers, CntManager cntManager) {
        super(GrobidModels.HEADER, cntManager);
        this.languageUtilities = LanguageUtilities.getInstance();
        this.parsers = engineParsers;
        GrobidProperties.getInstance();
    }

    public HeaderParser(EngineParsers engineParsers) {
        super(GrobidModels.HEADER);
        this.languageUtilities = LanguageUtilities.getInstance();
        this.parsers = engineParsers;
        GrobidProperties.getInstance();
    }

    public Pair<String, Document> processing(File file, BiblioItem biblioItem, GrobidAnalysisConfig grobidAnalysisConfig) {
        DocumentSource documentSource = null;
        try {
            documentSource = DocumentSource.fromPdf(file, grobidAnalysisConfig.getStartPage(), grobidAnalysisConfig.getEndPage());
            Document processing = this.parsers.getSegmentationParser().processing(documentSource, grobidAnalysisConfig);
            ImmutablePair immutablePair = new ImmutablePair(processingHeaderSection(grobidAnalysisConfig, processing, biblioItem), processing);
            if (documentSource != null) {
                documentSource.close(true, true, true);
            }
            return immutablePair;
        } catch (Throwable th) {
            if (documentSource != null) {
                documentSource.close(true, true, true);
            }
            throw th;
        }
    }

    public Pair<String, Document> processing2(String str, BiblioItem biblioItem, GrobidAnalysisConfig grobidAnalysisConfig) {
        DocumentSource documentSource = null;
        try {
            try {
                DocumentSource fromPdf = DocumentSource.fromPdf(new File(str), grobidAnalysisConfig.getStartPage(), grobidAnalysisConfig.getEndPage());
                Document document = new Document(fromPdf);
                document.addTokenizedDocument(grobidAnalysisConfig);
                if (document.getBlocks() == null) {
                    throw new GrobidException("PDF parsing resulted in empty content");
                }
                Pair<String, Document> of = Pair.of(processingHeaderBlock(grobidAnalysisConfig, document, biblioItem), document);
                if (fromPdf != null) {
                    fromPdf.close(true, true, true);
                }
                return of;
            } catch (Exception e) {
                throw new GrobidException(e, GrobidExceptionStatus.GENERAL);
            }
        } catch (Throwable th) {
            if (0 != 0) {
                documentSource.close(true, true, true);
            }
            throw th;
        }
    }

    public String processingHeaderBlock(GrobidAnalysisConfig grobidAnalysisConfig, Document document, BiblioItem biblioItem) throws Exception {
        String str;
        List<Date> processing;
        List<Date> processing2;
        BiblioItem processing3;
        int intValue;
        String headerFeatured = document.getHeaderFeatured(true, true);
        List<LayoutToken> tokenizationsHeader = document.getTokenizationsHeader();
        if (headerFeatured == null || headerFeatured.trim().length() <= 0) {
            LOGGER.debug("WARNING: header is empty.");
        } else {
            String label = label(headerFeatured);
            biblioItem = resultExtraction(label, true, tokenizationsHeader, biblioItem, document);
            str = "";
            str = biblioItem.getTitle() != null ? str + biblioItem.getTitle() : "";
            if (biblioItem.getAbstract() != null) {
                str = str + "\n" + biblioItem.getAbstract();
            }
            if (biblioItem.getKeywords() != null) {
                str = str + "\n" + biblioItem.getKeywords();
            }
            if (str.length() < 200) {
                str = str + document.getBody();
            }
            Language runLanguageId = this.languageUtilities.runLanguageId(str);
            if (runLanguageId != null) {
                String lang = runLanguageId.getLang();
                document.setLanguage(lang);
                biblioItem.setLanguage(lang);
            }
            if (biblioItem != null) {
                if (biblioItem.getAbstract() != null) {
                    biblioItem.setAbstract(TextUtilities.dehyphenizeHard(biblioItem.getAbstract()));
                }
                BiblioItem.cleanTitles(biblioItem);
                if (biblioItem.getTitle() != null) {
                    String trim = TextUtilities.dehyphenize(biblioItem.getTitle()).trim();
                    if (trim.length() > 1) {
                        if (trim.startsWith(EomiUtil.RESULT_SUCCESS)) {
                            trim = trim.substring(1, trim.length());
                        }
                        trim = trim.trim();
                    }
                    biblioItem.setTitle(trim);
                }
                if (biblioItem.getBookTitle() != null) {
                    biblioItem.setBookTitle(TextUtilities.dehyphenize(biblioItem.getBookTitle()));
                }
                biblioItem.setOriginalAuthors(biblioItem.getAuthors());
                boolean z = false;
                ArrayList arrayList = new ArrayList();
                if (biblioItem.getAuthors() != null) {
                    String[] split = biblioItem.getAuthors().split("\n");
                    List<List<LayoutToken>> split2 = LayoutTokensUtil.split(biblioItem.getAuthorsTokens(), Pattern.compile("\n"), false);
                    boolean z2 = split.length > 1;
                    for (int i = 0; i < split.length; i++) {
                        new ArrayList().add(split[i]);
                        List<Person> processingHeaderWithLayoutTokens = this.parsers.getAuthorParser().processingHeaderWithLayoutTokens(split2.get(i));
                        if (processingHeaderWithLayoutTokens != null) {
                            for (Person person : processingHeaderWithLayoutTokens) {
                                biblioItem.addFullAuthor(person);
                                if (person.getMarkers() != null) {
                                    z = true;
                                }
                                arrayList.add(Integer.valueOf(i));
                            }
                        }
                    }
                    biblioItem.setFullAffiliations(this.parsers.getAffiliationAddressParser().processReflow(label, tokenizationsHeader));
                    biblioItem.attachEmails();
                    boolean z3 = false;
                    if (z2 && !z && biblioItem.getFullAffiliations() != null && split != null && biblioItem.getFullAffiliations().size() == split.length) {
                        int i2 = 0;
                        List<Person> fullAuthors = biblioItem.getFullAuthors();
                        if (fullAuthors != null) {
                            for (Person person2 : fullAuthors) {
                                if (i2 < arrayList.size() && (intValue = ((Integer) arrayList.get(i2)).intValue()) < biblioItem.getFullAffiliations().size()) {
                                    person2.addAffiliation(biblioItem.getFullAffiliations().get(intValue));
                                }
                                i2++;
                            }
                        }
                        z3 = true;
                        biblioItem.setFullAffiliations(null);
                        biblioItem.setAffiliation(null);
                    }
                    if (!z3) {
                        biblioItem.attachAffiliations();
                    }
                    if (biblioItem.getEditors() != null) {
                        biblioItem.setFullEditors(this.parsers.getAuthorParser().processingHeader(biblioItem.getEditors()));
                    }
                    if (biblioItem.getReference() != null && (processing3 = this.parsers.getCitationParser().processing(biblioItem.getReference(), 0)) != null) {
                        BiblioItem.correct(biblioItem, processing3);
                    }
                }
                if (biblioItem.getKeyword() != null) {
                    String cleanKeywords = BiblioItem.cleanKeywords(TextUtilities.dehyphenize(biblioItem.getKeyword()));
                    biblioItem.setKeyword(cleanKeywords.replace("\n", " ").replace("  ", " "));
                    List<Keyword> segmentKeywords = BiblioItem.segmentKeywords(cleanKeywords);
                    if (segmentKeywords != null && segmentKeywords.size() > 0) {
                        biblioItem.setKeywords(segmentKeywords);
                    }
                }
                List<String> dOIMatches = document.getDOIMatches();
                if (dOIMatches != null && dOIMatches.size() == 1 && biblioItem != null) {
                    biblioItem.setDOI(dOIMatches.get(0));
                }
                biblioItem = consolidateHeader(biblioItem, grobidAnalysisConfig.getConsolidateHeader());
                if (biblioItem != null) {
                    if (biblioItem.getPublicationDate() != null && (processing2 = this.parsers.getDateParser().processing(biblioItem.getPublicationDate())) != null && processing2.size() > 0) {
                        biblioItem.setNormalizedPublicationDate(processing2.get(0));
                    }
                    if (biblioItem.getSubmissionDate() != null && (processing = this.parsers.getDateParser().processing(biblioItem.getSubmissionDate())) != null && processing.size() > 0) {
                        biblioItem.setNormalizedSubmissionDate(processing.get(0));
                    }
                }
            }
        }
        document.setResHeader(biblioItem);
        StringBuilder tEIHeader = new TEIFormatter(document, null).toTEIHeader(biblioItem, null, null, grobidAnalysisConfig);
        tEIHeader.append("\t</text>\n");
        tEIHeader.append("</TEI>\n");
        return tEIHeader.toString();
    }

    public String processingHeaderSection(GrobidAnalysisConfig grobidAnalysisConfig, Document document, BiblioItem biblioItem) {
        String str;
        List<Date> processing;
        List<Date> processing2;
        int intValue;
        try {
            SortedSet<DocumentPiece> documentPart = document.getDocumentPart(SegmentationLabels.HEADER);
            List<LayoutToken> tokenizations = document.getTokenizations();
            if (documentPart == null) {
                return null;
            }
            ArrayList arrayList = new ArrayList();
            for (DocumentPiece documentPiece : documentPart) {
                DocumentPointer left = documentPiece.getLeft();
                DocumentPointer right = documentPiece.getRight();
                int tokenDocPos = left.getTokenDocPos();
                int tokenDocPos2 = right.getTokenDocPos();
                for (int i = tokenDocPos; i < tokenDocPos2; i++) {
                    arrayList.add(tokenizations.get(i));
                }
            }
            Pair<String, List<LayoutToken>> sectionHeaderFeatured = getSectionHeaderFeatured(document, documentPart, true);
            String str2 = (String) sectionHeaderFeatured.getLeft();
            List<LayoutToken> list = (List) sectionHeaderFeatured.getRight();
            String str3 = null;
            if (str2 != null && str2.trim().length() > 0) {
                str3 = label(str2);
                biblioItem = resultExtraction(str3, true, list, biblioItem, document);
            }
            str = "";
            str = biblioItem.getTitle() != null ? str + biblioItem.getTitle() : "";
            if (biblioItem.getAbstract() != null) {
                str = str + "\n" + biblioItem.getAbstract();
            }
            if (str.length() < 200) {
                SortedSet<DocumentPiece> documentPart2 = document.getDocumentPart(SegmentationLabels.BODY);
                StringBuilder sb = new StringBuilder();
                for (DocumentPiece documentPiece2 : documentPart2) {
                    DocumentPointer left2 = documentPiece2.getLeft();
                    DocumentPointer right2 = documentPiece2.getRight();
                    int tokenDocPos3 = left2.getTokenDocPos();
                    int tokenDocPos4 = right2.getTokenDocPos();
                    for (int i2 = tokenDocPos3; i2 < tokenDocPos4; i2++) {
                        sb.append(tokenizations.get(i2));
                        sb.append(" ");
                    }
                }
                str = str + " " + sb.toString();
            }
            Language runLanguageId = this.languageUtilities.runLanguageId(str);
            if (runLanguageId != null) {
                String lang = runLanguageId.getLang();
                document.setLanguage(lang);
                biblioItem.setLanguage(lang);
            }
            if (biblioItem != null) {
                if (biblioItem.getAbstract() != null) {
                    biblioItem.setAbstract(TextUtilities.dehyphenizeHard(biblioItem.getAbstract()));
                }
                BiblioItem.cleanTitles(biblioItem);
                if (biblioItem.getTitle() != null) {
                    String trim = TextUtilities.dehyphenize(biblioItem.getTitle()).trim();
                    if (trim.length() > 1) {
                        if (trim.startsWith(EomiUtil.RESULT_SUCCESS)) {
                            trim = trim.substring(1, trim.length());
                        }
                        trim = trim.trim();
                    }
                    biblioItem.setTitle(trim);
                }
                if (biblioItem.getBookTitle() != null) {
                    biblioItem.setBookTitle(TextUtilities.dehyphenize(biblioItem.getBookTitle()));
                }
                biblioItem.setOriginalAuthors(biblioItem.getAuthors());
                boolean z = false;
                ArrayList arrayList2 = new ArrayList();
                String[] strArr = null;
                if (biblioItem.getAuthors() != null) {
                    strArr = biblioItem.getAuthors().split("\n");
                    r20 = strArr.length > 1;
                    for (int i3 = 0; i3 < strArr.length; i3++) {
                        List<Person> processingHeader = this.parsers.getAuthorParser().processingHeader(strArr[i3]);
                        if (processingHeader != null) {
                            for (Person person : processingHeader) {
                                biblioItem.addFullAuthor(person);
                                if (person.getMarkers() != null) {
                                    z = true;
                                }
                                arrayList2.add(Integer.valueOf(i3));
                            }
                        }
                    }
                }
                biblioItem.setFullAffiliations(this.parsers.getAffiliationAddressParser().processReflow(str3, tokenizations));
                biblioItem.attachEmails();
                boolean z2 = false;
                if (r20 && !z && biblioItem.getFullAffiliations() != null && strArr != null && biblioItem.getFullAffiliations().size() == strArr.length) {
                    int i4 = 0;
                    for (Person person2 : biblioItem.getFullAuthors()) {
                        if (i4 < arrayList2.size() && (intValue = ((Integer) arrayList2.get(i4)).intValue()) < biblioItem.getFullAffiliations().size()) {
                            person2.addAffiliation(biblioItem.getFullAffiliations().get(intValue));
                        }
                        i4++;
                    }
                    z2 = true;
                    biblioItem.setFullAffiliations(null);
                    biblioItem.setAffiliation(null);
                }
                if (!z2) {
                    biblioItem.attachAffiliations();
                }
                if (biblioItem.getEditors() != null) {
                    biblioItem.setFullEditors(this.parsers.getAuthorParser().processingHeader(biblioItem.getEditors()));
                }
                if (biblioItem.getReference() != null) {
                    BiblioItem.correct(biblioItem, this.parsers.getCitationParser().processing(biblioItem.getReference(), 0));
                }
            }
            if (biblioItem.getKeyword() != null) {
                String cleanKeywords = BiblioItem.cleanKeywords(TextUtilities.dehyphenize(biblioItem.getKeyword()));
                biblioItem.setKeyword(cleanKeywords.replace("\n", " ").replace("  ", " "));
                List<Keyword> segmentKeywords = BiblioItem.segmentKeywords(cleanKeywords);
                if (segmentKeywords != null && segmentKeywords.size() > 0) {
                    biblioItem.setKeywords(segmentKeywords);
                }
            }
            List<String> dOIMatches = document.getDOIMatches();
            if (dOIMatches != null && dOIMatches.size() == 1 && biblioItem != null) {
                biblioItem.setDOI(dOIMatches.get(0));
            }
            BiblioItem consolidateHeader = consolidateHeader(biblioItem, grobidAnalysisConfig.getConsolidateHeader());
            if (consolidateHeader != null) {
                if (consolidateHeader.getPublicationDate() != null && (processing2 = this.parsers.getDateParser().processing(consolidateHeader.getPublicationDate())) != null && processing2.size() > 0) {
                    consolidateHeader.setNormalizedPublicationDate(processing2.get(0));
                }
                if (consolidateHeader.getSubmissionDate() != null && (processing = this.parsers.getDateParser().processing(consolidateHeader.getSubmissionDate())) != null && processing.size() > 0) {
                    consolidateHeader.setNormalizedSubmissionDate(processing.get(0));
                }
            }
            StringBuilder tEIHeader = new TEIFormatter(document, null).toTEIHeader(consolidateHeader, null, null, grobidAnalysisConfig);
            tEIHeader.append("\t</text>\n");
            tEIHeader.append("</TEI>\n");
            return tEIHeader.toString();
        } catch (Exception e) {
            throw new GrobidException("An exception occurred while running Grobid.", e);
        }
    }

    public Pair<String, List<LayoutToken>> getSectionHeaderFeatured(Document document, SortedSet<DocumentPiece> sortedSet, boolean z) {
        String text;
        FeatureFactory featureFactory = FeatureFactory.getInstance();
        StringBuilder sb = new StringBuilder();
        String str = null;
        int i = -1;
        List<Block> blocks = document.getBlocks();
        if (blocks == null || blocks.size() == 0) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (DocumentPiece documentPiece : sortedSet) {
            DocumentPointer left = documentPiece.getLeft();
            DocumentPointer right = documentPiece.getRight();
            for (int blockPtr = left.getBlockPtr(); blockPtr <= right.getBlockPtr(); blockPtr++) {
                Block block = blocks.get(blockPtr);
                boolean z2 = false;
                boolean z3 = false;
                List<LayoutToken> tokens = block.getTokens();
                if (tokens != null) {
                    int tokenDocPos = blockPtr == left.getBlockPtr() ? left.getTokenDocPos() - block.getStartToken() : 0;
                    while (tokenDocPos < tokens.size() && (blockPtr != right.getBlockPtr() || tokenDocPos <= right.getTokenDocPos() - block.getStartToken())) {
                        LayoutToken layoutToken = tokens.get(tokenDocPos);
                        arrayList.add(layoutToken);
                        FeaturesVectorHeader featuresVectorHeader = new FeaturesVectorHeader();
                        featuresVectorHeader.token = layoutToken;
                        String text2 = layoutToken.getText();
                        if (text2 == null) {
                            tokenDocPos++;
                        } else {
                            String replace = text2.replace(" ", "");
                            if (replace.length() == 0) {
                                tokenDocPos++;
                            } else if (replace.equals("\n")) {
                                z2 = true;
                                tokenDocPos++;
                            } else {
                                boolean z4 = false;
                                if (z2) {
                                    z4 = true;
                                    z2 = false;
                                }
                                String replaceAll = replace.replaceAll("[ \n]", "");
                                if (TextUtilities.filterLine(replaceAll)) {
                                    tokenDocPos++;
                                } else {
                                    featuresVectorHeader.string = replaceAll;
                                    if (z4) {
                                        featuresVectorHeader.lineStatus = "LINESTART";
                                    }
                                    if (featureFactory.isPunct.matcher(replaceAll).find()) {
                                        featuresVectorHeader.punctType = "PUNCT";
                                    }
                                    if (replaceAll.equals(TextUtilities.START_BRACKET) || replaceAll.equals("[")) {
                                        featuresVectorHeader.punctType = "OPENBRACKET";
                                    } else if (replaceAll.equals(TextUtilities.END_BRACKET) || replaceAll.equals("]")) {
                                        featuresVectorHeader.punctType = "ENDBRACKET";
                                    } else if (replaceAll.equals(".")) {
                                        featuresVectorHeader.punctType = "DOT";
                                    } else if (replaceAll.equals(TextUtilities.COMMA)) {
                                        featuresVectorHeader.punctType = "COMMA";
                                    } else if (replaceAll.equals("-")) {
                                        featuresVectorHeader.punctType = "HYPHEN";
                                    } else if (replaceAll.equals(TextUtilities.DOUBLE_QUOTE) || replaceAll.equals(TextUtilities.QUOTE) || replaceAll.equals("`")) {
                                        featuresVectorHeader.punctType = "QUOTE";
                                    }
                                    if (tokenDocPos == 0) {
                                        featuresVectorHeader.lineStatus = "LINESTART";
                                        featuresVectorHeader.blockStatus = "BLOCKSTART";
                                    } else if (tokenDocPos == tokens.size() - 1) {
                                        featuresVectorHeader.lineStatus = "LINEEND";
                                        z2 = true;
                                        featuresVectorHeader.blockStatus = "BLOCKEND";
                                        z3 = true;
                                    } else {
                                        boolean z5 = false;
                                        boolean z6 = false;
                                        for (int i2 = 1; tokenDocPos + i2 < tokens.size() && !z6; i2++) {
                                            LayoutToken layoutToken2 = tokens.get(tokenDocPos + i2);
                                            if (layoutToken2 != null && (text = layoutToken2.getText()) != null) {
                                                if (text.equals("\n") || replaceAll.equals("\r")) {
                                                    z5 = true;
                                                    z6 = true;
                                                } else if (text.trim().length() != 0 && !replaceAll.equals(" ") && !text.contains("@IMAGE") && !text.contains("@PAGE") && !replaceAll.contains(".pbm") && !replaceAll.contains(".ppm") && !replaceAll.contains(".png") && !replaceAll.contains(".svg") && !replaceAll.contains(".jpg")) {
                                                    z6 = true;
                                                }
                                            }
                                            if (tokenDocPos + i2 == tokens.size() - 1) {
                                                z3 = true;
                                                z5 = true;
                                            }
                                        }
                                        if (!z5 && !z4) {
                                            featuresVectorHeader.lineStatus = "LINEIN";
                                        } else if (!z4) {
                                            featuresVectorHeader.lineStatus = "LINEEND";
                                            z2 = true;
                                        }
                                        if (!z3 && featuresVectorHeader.blockStatus == null) {
                                            featuresVectorHeader.blockStatus = "BLOCKIN";
                                        } else if (featuresVectorHeader.blockStatus == null) {
                                            featuresVectorHeader.blockStatus = "BLOCKEND";
                                        }
                                    }
                                    if (replaceAll.length() == 1) {
                                        featuresVectorHeader.singleChar = true;
                                    }
                                    if (Character.isUpperCase(replaceAll.charAt(0))) {
                                        featuresVectorHeader.capitalisation = "INITCAP";
                                    }
                                    if (featureFactory.test_all_capital(replaceAll)) {
                                        featuresVectorHeader.capitalisation = "ALLCAP";
                                    }
                                    if (FeatureFactory.test_digit(replaceAll)) {
                                        featuresVectorHeader.digit = "CONTAINSDIGITS";
                                    }
                                    if (featureFactory.test_common(replaceAll)) {
                                        featuresVectorHeader.commonName = true;
                                    }
                                    if (featureFactory.test_names(replaceAll)) {
                                        featuresVectorHeader.properName = true;
                                    }
                                    if (featureFactory.test_month(replaceAll)) {
                                        featuresVectorHeader.month = true;
                                    }
                                    if (replaceAll.contains("-")) {
                                        featuresVectorHeader.containDash = true;
                                    }
                                    if (featureFactory.isDigit.matcher(replaceAll).find()) {
                                        featuresVectorHeader.digit = "ALLDIGIT";
                                    }
                                    if (featureFactory.year.matcher(replaceAll).find()) {
                                        featuresVectorHeader.year = true;
                                    }
                                    if (featureFactory.email.matcher(replaceAll).find()) {
                                        featuresVectorHeader.email = true;
                                    }
                                    if (featureFactory.http.matcher(replaceAll).find()) {
                                        featuresVectorHeader.http = true;
                                    }
                                    if (str == null) {
                                        str = layoutToken.getFont();
                                        featuresVectorHeader.fontStatus = "NEWFONT";
                                    } else if (str.equals(layoutToken.getFont())) {
                                        featuresVectorHeader.fontStatus = "SAMEFONT";
                                    } else {
                                        str = layoutToken.getFont();
                                        featuresVectorHeader.fontStatus = "NEWFONT";
                                    }
                                    int fontSize = (int) layoutToken.getFontSize();
                                    if (i == -1) {
                                        i = fontSize;
                                        featuresVectorHeader.fontSize = "HIGHERFONT";
                                    } else if (i == fontSize) {
                                        featuresVectorHeader.fontSize = "SAMEFONTSIZE";
                                    } else if (i < fontSize) {
                                        featuresVectorHeader.fontSize = "HIGHERFONT";
                                        i = fontSize;
                                    } else if (i > fontSize) {
                                        featuresVectorHeader.fontSize = "LOWERFONT";
                                        i = fontSize;
                                    }
                                    if (layoutToken.getBold()) {
                                        featuresVectorHeader.bold = true;
                                    }
                                    if (layoutToken.getItalic()) {
                                        featuresVectorHeader.italic = true;
                                    }
                                    if (layoutToken.getRotation()) {
                                        featuresVectorHeader.rotation = true;
                                    }
                                    if (featuresVectorHeader.capitalisation == null) {
                                        featuresVectorHeader.capitalisation = "NOCAPS";
                                    }
                                    if (featuresVectorHeader.digit == null) {
                                        featuresVectorHeader.digit = "NODIGIT";
                                    }
                                    if (featuresVectorHeader.punctType == null) {
                                        featuresVectorHeader.punctType = "NOPUNCT";
                                    }
                                    sb.append(featuresVectorHeader.printVector(z));
                                    tokenDocPos++;
                                }
                            }
                        }
                    }
                }
            }
        }
        return Pair.of(sb.toString(), arrayList);
    }

    public Document createTrainingHeader(String str, String str2, String str3) {
        try {
            try {
                File file = new File(str);
                String name = file.getName();
                DocumentSource fromPdf = DocumentSource.fromPdf(file);
                Document processing = this.parsers.getSegmentationParser().processing(fromPdf, GrobidAnalysisConfig.defaultInstance());
                SortedSet<DocumentPiece> documentPart = processing.getDocumentPart(SegmentationLabels.HEADER);
                List<LayoutToken> tokenizations = processing.getTokenizations();
                if (documentPart != null) {
                    ArrayList arrayList = new ArrayList();
                    for (DocumentPiece documentPiece : documentPart) {
                        DocumentPointer left = documentPiece.getLeft();
                        DocumentPointer right = documentPiece.getRight();
                        int tokenDocPos = left.getTokenDocPos();
                        int tokenDocPos2 = right.getTokenDocPos();
                        for (int i = tokenDocPos; i < tokenDocPos2; i++) {
                            arrayList.add(tokenizations.get(i));
                        }
                    }
                    String str4 = (String) getSectionHeaderFeatured(processing, documentPart, true).getLeft();
                    if (str4 != null && str4.trim().length() > 0) {
                        String label = label(str4);
                        OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".header")), false), "UTF-8");
                        outputStreamWriter.write(str4 + "\n");
                        outputStreamWriter.close();
                        StringBuilder trainingExtraction = trainingExtraction(label, true, arrayList);
                        Language runLanguageId = this.languageUtilities.runLanguageId(trainingExtraction.toString());
                        if (runLanguageId != null) {
                            processing.setLanguage(runLanguageId.getLang());
                        }
                        StringBuilder trainingExtraction2 = this.parsers.getAffiliationAddressParser().trainingExtraction(label, arrayList);
                        StringBuilder sb = null;
                        String str5 = "";
                        int i2 = 0;
                        StringTokenizer stringTokenizer = new StringTokenizer(label, "\n");
                        while (stringTokenizer.hasMoreTokens() && i2 < arrayList.size()) {
                            String nextToken = stringTokenizer.nextToken();
                            String text = arrayList.get(i2).getText();
                            String text2 = arrayList.get(i2).getText();
                            while (true) {
                                if (!text2.equals(" ") && !text2.equals("\t") && !text2.equals("\n") && !text2.equals("\r")) {
                                    break;
                                }
                                i2++;
                                if (i2 > 0 && i2 < arrayList.size()) {
                                    text2 = arrayList.get(i2).getText();
                                    text = text + text2;
                                }
                            }
                            if (nextToken.endsWith(TaggingLabels.DATE_LABEL)) {
                                str5 = str5 + text;
                            }
                            i2++;
                        }
                        if (str5.trim().length() > 1) {
                            ArrayList arrayList2 = new ArrayList();
                            arrayList2.add(str5.trim());
                            sb = this.parsers.getDateParser().trainingExtraction(arrayList2);
                        }
                        String str6 = "";
                        int i3 = 0;
                        StringTokenizer stringTokenizer2 = new StringTokenizer(label, "\n");
                        while (stringTokenizer2.hasMoreTokens() && i3 < arrayList.size()) {
                            String nextToken2 = stringTokenizer2.nextToken();
                            String text3 = arrayList.get(i3).getText();
                            String text4 = arrayList.get(i3).getText();
                            while (true) {
                                if (!text4.equals(" ") && !text4.equals("\t") && !text4.equals("\n") && !text4.equals("\r")) {
                                    break;
                                }
                                i3++;
                                if (i3 > 0 && i3 < arrayList.size()) {
                                    text4 = arrayList.get(i3).getText();
                                    text3 = text3 + text4;
                                }
                            }
                            if (nextToken2.endsWith(TaggingLabels.AUTHOR_LABEL)) {
                                str6 = str6 + text3;
                            }
                            i3++;
                        }
                        StringBuilder trainingExtraction3 = str6.length() > 1 ? this.parsers.getAuthorParser().trainingExtraction(str6.trim(), true) : null;
                        StringBuilder sb2 = null;
                        String str7 = "";
                        int i4 = 0;
                        StringTokenizer stringTokenizer3 = new StringTokenizer(label, "\n");
                        while (stringTokenizer3.hasMoreTokens() && i4 < arrayList.size()) {
                            String nextToken3 = stringTokenizer3.nextToken();
                            String text5 = arrayList.get(i4).getText();
                            String text6 = arrayList.get(i4).getText();
                            while (true) {
                                if (!text6.equals(" ") && !text6.equals("\t") && !text6.equals("\n") && !text6.equals("\r")) {
                                    break;
                                }
                                i4++;
                                if (i4 > 0 && i4 < arrayList.size()) {
                                    text6 = arrayList.get(i4).getText();
                                    text5 = text5 + text6;
                                }
                            }
                            if (nextToken3.endsWith(TaggingLabels.REFERENCE_LABEL)) {
                                str7 = str7 + text5;
                            }
                            i4++;
                        }
                        if (str7.length() > 1) {
                            ArrayList arrayList3 = new ArrayList();
                            arrayList3.add(str7.trim());
                            sb2 = this.parsers.getCitationParser().trainingExtraction(arrayList3);
                        }
                        OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str3 + File.separator + name.replace(".pdf", GrobidProperties.FILE_ENDING_TEI_HEADER)), false), "UTF-8");
                        outputStreamWriter2.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + name.replace(".pdf", "") + "\"/>\n\t</teiHeader>\n\t<text");
                        if (runLanguageId != null) {
                            outputStreamWriter2.write(" xml:lang=\"en\"");
                        }
                        outputStreamWriter2.write(">\n\t\t<front>\n");
                        outputStreamWriter2.write(trainingExtraction.toString());
                        outputStreamWriter2.write("\n\t\t</front>\n\t</text>\n</tei>\n");
                        outputStreamWriter2.close();
                        if (trainingExtraction2 != null && trainingExtraction2.length() > 0) {
                            OutputStreamWriter outputStreamWriter3 = new OutputStreamWriter(new FileOutputStream(new File(str3 + File.separator + name.replace(".pdf", ".affiliation.tei.xml")), false), "UTF-8");
                            outputStreamWriter3.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
                            outputStreamWriter3.write("\n<tei xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">");
                            outputStreamWriter3.write("\n\t<teiHeader>\n\t\t<fileDesc>\n\t\t\t<sourceDesc>");
                            outputStreamWriter3.write("\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n\t\t\t\t\t\t<author>\n\n");
                            outputStreamWriter3.write(trainingExtraction2.toString());
                            outputStreamWriter3.write("\n\t\t\t\t\t\t</author>\n\t\t\t\t\t</analytic>");
                            outputStreamWriter3.write("\n\t\t\t\t</biblStruct>\n\t\t\t</sourceDesc>\n\t\t</fileDesc>");
                            outputStreamWriter3.write("\n\t</teiHeader>\n</tei>\n");
                            outputStreamWriter3.close();
                        }
                        if (sb != null && sb.length() > 0) {
                            OutputStreamWriter outputStreamWriter4 = new OutputStreamWriter(new FileOutputStream(new File(str3 + File.separator + name.replace(".pdf", ".date.xml")), false), "UTF-8");
                            outputStreamWriter4.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
                            outputStreamWriter4.write("<dates>\n");
                            outputStreamWriter4.write(sb.toString());
                            outputStreamWriter4.write("</dates>\n");
                            outputStreamWriter4.close();
                        }
                        if (trainingExtraction3 != null && trainingExtraction3.length() > 0) {
                            OutputStreamWriter outputStreamWriter5 = new OutputStreamWriter(new FileOutputStream(new File(str3 + File.separator + name.replace(".pdf", ".authors.tei.xml")), false), "UTF-8");
                            outputStreamWriter5.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
                            outputStreamWriter5.write("\n<tei xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\">");
                            outputStreamWriter5.write("\n\t<teiHeader>\n\t\t<fileDesc>\n\t\t\t<sourceDesc>");
                            outputStreamWriter5.write("\n\t\t\t\t<biblStruct>\n\t\t\t\t\t<analytic>\n\n\t\t\t\t\t\t<author>");
                            outputStreamWriter5.write("\n\t\t\t\t\t\t\t<persName>\n");
                            outputStreamWriter5.write(trainingExtraction3.toString());
                            outputStreamWriter5.write("\t\t\t\t\t\t\t</persName>\n");
                            outputStreamWriter5.write("\t\t\t\t\t\t</author>\n\n\t\t\t\t\t</analytic>");
                            outputStreamWriter5.write("\n\t\t\t\t</biblStruct>\n\t\t\t</sourceDesc>\n\t\t</fileDesc>");
                            outputStreamWriter5.write("\n\t</teiHeader>\n</tei>\n");
                            outputStreamWriter5.close();
                        }
                        if (sb2 != null && sb2.length() > 0) {
                            OutputStreamWriter outputStreamWriter6 = new OutputStreamWriter(new FileOutputStream(new File(str3 + File.separator + name.replace(".pdf", ".header-reference.xml")), false), "UTF-8");
                            outputStreamWriter6.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
                            outputStreamWriter6.write("<citations>\n");
                            outputStreamWriter6.write(sb2.toString());
                            outputStreamWriter6.write("</citations>\n");
                            outputStreamWriter6.close();
                        }
                    }
                } else {
                    System.out.println("no header found");
                }
                DocumentSource.close(fromPdf, true, true, true);
                return processing;
            } catch (Exception e) {
                e.printStackTrace();
                throw new GrobidException("An exception occurred while running Grobid.", e);
            }
        } catch (Throwable th) {
            DocumentSource.close(null, true, true, true);
            throw th;
        }
    }

    public BiblioItem resultExtraction(String str, boolean z, List<LayoutToken> list, BiblioItem biblioItem, Document document) {
        String str2;
        biblioItem.generalResultMapping(document, str, list);
        String str3 = null;
        String str4 = null;
        String str5 = null;
        int i = 0;
        Iterator it = Splitter.on("\n").splitToList(str).iterator();
        while (it.hasNext()) {
            boolean z2 = false;
            String trim = ((String) it.next()).trim();
            if (trim.length() != 0) {
                str2 = "\t";
                StringTokenizer stringTokenizer = new StringTokenizer(trim, trim.indexOf(str2) == -1 ? " " : "\t");
                ArrayList arrayList = new ArrayList();
                int i2 = 0;
                int countTokens = stringTokenizer.countTokens();
                LayoutToken layoutToken = null;
                while (stringTokenizer.hasMoreTokens()) {
                    String trim2 = stringTokenizer.nextToken().trim();
                    if (i2 == 0) {
                        str4 = trim2;
                        int i3 = i;
                        boolean z3 = false;
                        while (!z3 && i < list.size()) {
                            layoutToken = list.get(i);
                            String text = layoutToken.getText();
                            if (text.equals(" ")) {
                                z2 = true;
                            } else if (text.equals(trim2)) {
                                z3 = true;
                            }
                            i++;
                        }
                        if (i == list.size() && i - i3 > 2) {
                            i = i3;
                        }
                    } else if (i2 == countTokens - 1) {
                        str3 = trim2;
                    } else {
                        arrayList.add(trim2);
                    }
                    i2++;
                }
                if (str3.equals(TaggingLabels.TITLE_LABEL) || str3.equals("I-<title>")) {
                    if (biblioItem.getTitle() == null) {
                        biblioItem.setTitle(str4);
                    } else if (arrayList.contains("LINESTART")) {
                        biblioItem.setTitle(biblioItem.getTitle() + " " + str4);
                    } else if (z2) {
                        biblioItem.setTitle(biblioItem.getTitle() + " " + str4);
                    } else {
                        biblioItem.setTitle(biblioItem.getTitle() + str4);
                    }
                } else if (str3.equals(TaggingLabels.AUTHOR_LABEL) || str3.equals("I-<author>")) {
                    if (str5 == null || (str5 != null && str5.endsWith(TaggingLabels.AUTHOR_LABEL))) {
                        if (biblioItem.getAuthors() == null) {
                            biblioItem.setAuthors(str4);
                        } else if (z2) {
                            biblioItem.setAuthors(biblioItem.getAuthors() + " " + str4);
                            biblioItem.addAuthorsToken(new LayoutToken(" ", TaggingLabels.HEADER_AUTHOR));
                        } else {
                            biblioItem.setAuthors(biblioItem.getAuthors() + str4);
                        }
                    } else if (biblioItem.getAuthors() == null) {
                        biblioItem.setAuthors(str4);
                    } else if (z2) {
                        biblioItem.setAuthors(biblioItem.getAuthors() + " \n" + str4);
                        biblioItem.addAuthorsToken(new LayoutToken(" ", TaggingLabels.HEADER_AUTHOR)).addAuthorsToken(new LayoutToken("\n", TaggingLabels.HEADER_AUTHOR));
                    } else {
                        biblioItem.setAuthors(biblioItem.getAuthors() + "\n" + str4);
                        biblioItem.addAuthorsToken(new LayoutToken("\n", TaggingLabels.HEADER_AUTHOR));
                    }
                    biblioItem.addAuthorsToken(layoutToken);
                } else if (str3.equals(TaggingLabels.TECH_LABEL) || str3.equals("I-<tech>")) {
                    biblioItem.setItem(9);
                    if (biblioItem.getBookType() == null) {
                        biblioItem.setBookType(str4);
                    } else if (z2) {
                        biblioItem.setBookType(biblioItem.getBookType() + " " + str4);
                    } else {
                        biblioItem.setBookType(biblioItem.getBookType() + str4);
                    }
                } else if (str3.equals(TaggingLabels.LOCATION_LABEL) || str3.equals("I-<location>")) {
                    if (biblioItem.getLocation() == null) {
                        biblioItem.setLocation(str4);
                    } else if (z2) {
                        biblioItem.setLocation(biblioItem.getLocation() + " " + str4);
                    } else {
                        biblioItem.setLocation(biblioItem.getLocation() + str4);
                    }
                } else if (str3.equals(TaggingLabels.DATE_LABEL) || str3.equals("I-<date>")) {
                    if (biblioItem.getPublicationDate() == null) {
                        biblioItem.setPublicationDate(str4);
                    } else if (z2) {
                        biblioItem.setPublicationDate(biblioItem.getPublicationDate() + " " + str4);
                    } else {
                        biblioItem.setPublicationDate(biblioItem.getPublicationDate() + str4);
                    }
                } else if (str3.equals(TaggingLabels.DATESUB_LABEL) || str3.equals("I-<date-submission>")) {
                    if (biblioItem.getSubmissionDate() == null) {
                        biblioItem.setSubmissionDate(str4);
                    } else if (z2) {
                        biblioItem.setSubmissionDate(biblioItem.getSubmissionDate() + " " + str4);
                    } else {
                        biblioItem.setSubmissionDate(biblioItem.getSubmissionDate() + str4);
                    }
                } else if (str3.equals(TaggingLabels.PAGES_LABEL) || (str3.equals("<page>") || str3.equals("I-<pages>")) || str3.equals("I-<page>")) {
                    if (biblioItem.getPageRange() == null) {
                        biblioItem.setPageRange(str4);
                    } else if (z2) {
                        biblioItem.setPageRange(biblioItem.getPageRange() + " " + str4);
                    } else {
                        biblioItem.setPageRange(biblioItem.getPageRange() + str4);
                    }
                } else if (str3.equals(TaggingLabels.EDITOR_LABEL) || str3.equals("I-<editor>")) {
                    if (biblioItem.getEditors() == null) {
                        biblioItem.setEditors(str4);
                    } else if (z2) {
                        biblioItem.setEditors(biblioItem.getEditors() + " " + str4);
                    } else {
                        biblioItem.setEditors(biblioItem.getEditors() + str4);
                    }
                } else if (str3.equals(TaggingLabels.INSTITUTION_LABEL) || str3.equals("I-<institution>")) {
                    if (biblioItem.getInstitution() == null) {
                        biblioItem.setInstitution(str4);
                    } else if (z2) {
                        biblioItem.setInstitution(biblioItem.getInstitution() + "; " + str4);
                    } else {
                        biblioItem.setInstitution(biblioItem.getInstitution() + str4);
                    }
                } else if (str3.equals(TaggingLabels.NOTE_LABEL) || str3.equals("I-<note>")) {
                    if (biblioItem.getNote() == null) {
                        biblioItem.setNote(str4);
                    } else if (z2) {
                        biblioItem.setNote(biblioItem.getNote() + " " + str4);
                    } else {
                        biblioItem.setNote(biblioItem.getNote() + str4);
                    }
                } else if (str3.equals(TaggingLabels.ABSTRACT_LABEL) || str3.equals("I-<abstract>")) {
                    if (biblioItem.getAbstract() == null) {
                        biblioItem.setAbstract(str4);
                    } else if (z2) {
                        biblioItem.setAbstract(biblioItem.getAbstract() + " " + str4);
                    } else {
                        biblioItem.setAbstract(biblioItem.getAbstract() + str4);
                    }
                } else if (str3.equals(TaggingLabels.REFERENCE_LABEL) || str3.equals("I-<reference>")) {
                    if (biblioItem.getReference() == null) {
                        biblioItem.setReference(str4);
                    } else if (z2) {
                        biblioItem.setReference(biblioItem.getReference() + " " + str4);
                    } else {
                        biblioItem.setReference(biblioItem.getReference() + str4);
                    }
                } else if (str3.equals(TaggingLabels.GRANT_LABEL) || str3.equals("I-<grant>")) {
                    if (biblioItem.getGrant() == null) {
                        biblioItem.setGrant(str4);
                    } else if (z2) {
                        biblioItem.setGrant(biblioItem.getGrant() + " " + str4);
                    } else {
                        biblioItem.setGrant(biblioItem.getGrant() + str4);
                    }
                } else if (str3.equals(TaggingLabels.COPYRIGHT_LABEL) || str3.equals("I-<copyright>")) {
                    if (biblioItem.getCopyright() == null) {
                        biblioItem.setCopyright(str4);
                    } else if (z2) {
                        biblioItem.setCopyright(biblioItem.getCopyright() + " " + str4);
                    } else {
                        biblioItem.setCopyright(biblioItem.getCopyright() + str4);
                    }
                } else if (str3.equals(TaggingLabels.AFFILIATION_LABEL) || str3.equals("I-<affiliation>")) {
                    if (biblioItem.getAffiliation() == null) {
                        biblioItem.setAffiliation(str4);
                    } else if (str5 == null || !(str3.equals(str5) || str5.equals("I-<affiliation>"))) {
                        biblioItem.setAffiliation(biblioItem.getAffiliation() + " ; " + str4);
                    } else if (str3.equals("I-<affiliation>")) {
                        biblioItem.setAffiliation(biblioItem.getAffiliation() + " ; " + str4);
                    } else if (z2) {
                        biblioItem.setAffiliation(biblioItem.getAffiliation() + " " + str4);
                    } else {
                        biblioItem.setAffiliation(biblioItem.getAffiliation() + str4);
                    }
                } else if (str3.equals(TaggingLabels.ADDRESS_LABEL) || str3.equals("I-<address>")) {
                    if (biblioItem.getAddress() == null) {
                        biblioItem.setAddress(str4);
                    } else if (z2) {
                        biblioItem.setAddress(biblioItem.getAddress() + " " + str4);
                    } else {
                        biblioItem.setAddress(biblioItem.getAddress() + str4);
                    }
                } else if (str3.equals(TaggingLabels.EMAIL_LABEL) || str3.equals("I-<email>")) {
                    if (biblioItem.getEmail() == null) {
                        biblioItem.setEmail(str4);
                    } else if (str3.equals("I-<email>")) {
                        biblioItem.setEmail(biblioItem.getEmail() + " ; " + str4);
                    } else if (z2) {
                        biblioItem.setEmail(biblioItem.getEmail() + " " + str4);
                    } else {
                        biblioItem.setEmail(biblioItem.getEmail() + str4);
                    }
                } else if (str3.equals(TaggingLabels.PUBNUM_LABEL) || str3.equals("I-<pubnum>")) {
                    if (biblioItem.getPubnum() == null) {
                        biblioItem.setPubnum(str4);
                    } else if (z2) {
                        biblioItem.setPubnum(biblioItem.getPubnum() + " " + str4);
                    } else {
                        biblioItem.setPubnum(biblioItem.getPubnum() + str4);
                    }
                } else if (str3.equals(TaggingLabels.KEYWORD_LABEL) || str3.equals("I-<keyword>")) {
                    if (biblioItem.getKeyword() == null) {
                        biblioItem.setKeyword(str4);
                    } else if (arrayList.contains("LINESTART")) {
                        biblioItem.setKeyword(biblioItem.getKeyword() + " \n " + str4);
                    } else if (z2) {
                        biblioItem.setKeyword(biblioItem.getKeyword() + " " + str4);
                    } else {
                        biblioItem.setKeyword(biblioItem.getKeyword() + str4);
                    }
                } else if (str3.equals(TaggingLabels.PHONE_LABEL) || str3.equals("I-<phone>")) {
                    if (biblioItem.getPhone() == null) {
                        biblioItem.setPhone(str4);
                    } else if (z2) {
                        biblioItem.setPhone(biblioItem.getPhone() + " " + str4);
                    } else {
                        biblioItem.setPhone(biblioItem.getPhone() + str4);
                    }
                } else if (str3.equals(TaggingLabels.DEGREE_LABEL) || str3.equals("I-<degree>")) {
                    if (biblioItem.getDegree() == null) {
                        biblioItem.setDegree(str4);
                    } else if (z2) {
                        biblioItem.setDegree(biblioItem.getDegree() + " " + str4);
                    } else {
                        biblioItem.setDegree(biblioItem.getDegree() + str4);
                    }
                } else if (str3.equals(TaggingLabels.WEB_LABEL) || str3.equals("I-<web>")) {
                    if (biblioItem.getWeb() == null) {
                        biblioItem.setWeb(str4);
                    } else if (z2) {
                        biblioItem.setWeb(biblioItem.getWeb() + " " + str4);
                    } else {
                        biblioItem.setWeb(biblioItem.getWeb() + str4);
                    }
                } else if (str3.equals(TaggingLabels.DEDICATION_LABEL) || str3.equals("I-<dedication>")) {
                    if (biblioItem.getDedication() == null) {
                        biblioItem.setDedication(str4);
                    } else if (z2) {
                        biblioItem.setDedication(biblioItem.getDedication() + " " + str4);
                    } else {
                        biblioItem.setDedication(biblioItem.getDedication() + str4);
                    }
                } else if (str3.equals(TaggingLabels.SUBMISSION_LABEL) || str3.equals("I-<submission>")) {
                    if (biblioItem.getSubmission() == null) {
                        biblioItem.setSubmission(str4);
                    } else if (z2) {
                        biblioItem.setSubmission(biblioItem.getSubmission() + " " + str4);
                    } else {
                        biblioItem.setSubmission(biblioItem.getSubmission() + str4);
                    }
                } else if (str3.equals(TaggingLabels.ENTITLE_LABEL) || str3.equals("I-<entitle>")) {
                    if (biblioItem.getEnglishTitle() == null) {
                        biblioItem.setEnglishTitle(str4);
                    } else if (!str3.equals(str5)) {
                        biblioItem.setEnglishTitle(biblioItem.getEnglishTitle() + " ; " + str4);
                    } else if (arrayList.contains("LINESTART")) {
                        biblioItem.setEnglishTitle(biblioItem.getEnglishTitle() + " " + str4);
                    } else if (z2) {
                        biblioItem.setEnglishTitle(biblioItem.getEnglishTitle() + " " + str4);
                    } else {
                        biblioItem.setEnglishTitle(biblioItem.getEnglishTitle() + str4);
                    }
                } else if ((str3.equals(TaggingLabels.INTRO_LABEL) || str3.equals("I-<intro>")) && z) {
                    return biblioItem;
                }
                str5 = str3;
            }
        }
        return biblioItem;
    }

    public StringBuilder trainingExtraction(String str, boolean z, List<LayoutToken> list) {
        StringBuilder sb = new StringBuilder();
        StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
        String str2 = null;
        String str3 = null;
        String str4 = null;
        int i = 0;
        while (stringTokenizer.hasMoreTokens()) {
            boolean z2 = false;
            String trim = stringTokenizer.nextToken().trim();
            if (trim.length() != 0) {
                StringTokenizer stringTokenizer2 = new StringTokenizer(trim, "\t");
                int i2 = 0;
                boolean z3 = false;
                int countTokens = stringTokenizer2.countTokens();
                while (stringTokenizer2.hasMoreTokens()) {
                    String trim2 = stringTokenizer2.nextToken().trim();
                    if (i2 == 0) {
                        str3 = TextUtilities.HTMLEncode(trim2);
                        boolean z4 = false;
                        while (!z4 && i < list.size()) {
                            String t = list.get(i).t();
                            if (t.equals(" ") || t.equals(" ")) {
                                z2 = true;
                            } else if (t.equals(trim2)) {
                                z4 = true;
                            }
                            i++;
                        }
                    } else if (i2 == countTokens - 1) {
                        str2 = trim2;
                    } else if (trim2.equals("LINESTART")) {
                        z3 = true;
                    }
                    i2++;
                }
                if (z3) {
                    sb.append("<lb/>");
                }
                String str5 = null;
                if (str4 != null) {
                    str5 = str4.startsWith("I-") ? str4.substring(2, str4.length()) : str4;
                }
                String str6 = null;
                if (str2 != null) {
                    str6 = str2.startsWith("I-") ? str2.substring(2, str2.length()) : str2;
                }
                if (str4 != null) {
                    testClosingTag(sb, str6, str5);
                }
                boolean writeField = writeField(sb, str2, str5, str3, TaggingLabels.TITLE_LABEL, "<docTitle>\n\t<titlePart>", z2);
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.AUTHOR_LABEL, "<byline>\n\t<docAuthor>", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.LOCATION_LABEL, TaggingLabels.ADDRESS_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.ADDRESS_LABEL, TaggingLabels.ADDRESS_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.DATE_LABEL, TaggingLabels.DATE_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.DATESUB_LABEL, "<date type=\"submission\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.BOOKTITLE_LABEL, TaggingLabels.BOOKTITLE_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.PAGES_LABEL, TaggingLabels.PAGES_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.PUBLISHER_LABEL, TaggingLabels.PUBLISHER_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.JOURNAL_LABEL, TaggingLabels.JOURNAL_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.INSTITUTION_LABEL, "<byline>\n\t<affiliation>", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.AFFILIATION_LABEL, "<byline>\n\t<affiliation>", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.VOLUME_LABEL, TaggingLabels.VOLUME_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.EDITOR_LABEL, TaggingLabels.EDITOR_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.NOTE_LABEL, "<note type=\"other\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.ABSTRACT_LABEL, "<div type=\"abstract\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.EMAIL_LABEL, TaggingLabels.EMAIL_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.PUBNUM_LABEL, "<idno>", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.KEYWORD_LABEL, TaggingLabels.KEYWORD_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.PHONE_LABEL, TaggingLabels.PHONE_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.DEGREE_LABEL, "<note type=\"degree\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.WEB_LABEL, "<ptr type=\"web\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.DEDICATION_LABEL, TaggingLabels.DEDICATION_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.SUBMISSION_LABEL, "<note type=\"submission\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.ENTITLE_LABEL, "<note type=\"title\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.REFERENCE_LABEL, TaggingLabels.REFERENCE_LABEL, z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.COPYRIGHT_LABEL, "<note type=\"copyright\">", z2);
                }
                if (!writeField) {
                    writeField = writeField(sb, str2, str5, str3, TaggingLabels.GRANT_LABEL, "<note type=\"grant\">", z2);
                }
                if (!writeField) {
                    writeField(sb, str2, str5, str3, TaggingLabels.INTRO_LABEL, "<p type=\"introduction\">", z2);
                }
                str4 = str2;
                if (!stringTokenizer.hasMoreTokens() && str4 != null) {
                    testClosingTag(sb, "", str6);
                }
            }
        }
        return sb;
    }

    private void testClosingTag(StringBuilder sb, String str, String str2) {
        if (str.equals(str2)) {
            return;
        }
        if (str2.equals(TaggingLabels.TITLE_LABEL)) {
            sb.append("</titlePart>\n\t</docTitle>\n");
            return;
        }
        if (str2.equals(TaggingLabels.AUTHOR_LABEL)) {
            sb.append("</docAuthor>\n\t</byline>\n");
            return;
        }
        if (str2.equals(TaggingLabels.LOCATION_LABEL)) {
            sb.append("</address>\n");
            return;
        }
        if (str2.equals(TaggingLabels.DATE_LABEL)) {
            sb.append("</date>\n");
            return;
        }
        if (str2.equals(TaggingLabels.ABSTRACT_LABEL)) {
            sb.append("</div>\n");
            return;
        }
        if (str2.equals(TaggingLabels.ADDRESS_LABEL)) {
            sb.append("</address>\n");
            return;
        }
        if (str2.equals(TaggingLabels.DATESUB_LABEL)) {
            sb.append("</date>\n");
            return;
        }
        if (str2.equals(TaggingLabels.BOOKTITLE_LABEL)) {
            sb.append("</booktitle>\n");
            return;
        }
        if (str2.equals(TaggingLabels.PAGES_LABEL)) {
            sb.append("</pages>\n");
            return;
        }
        if (str2.equals(TaggingLabels.EMAIL_LABEL)) {
            sb.append("</email>\n");
            return;
        }
        if (str2.equals(TaggingLabels.PUBLISHER_LABEL)) {
            sb.append("</publisher>\n");
            return;
        }
        if (str2.equals(TaggingLabels.INSTITUTION_LABEL)) {
            sb.append("</affiliation>\n\t</byline>\n");
            return;
        }
        if (str2.equals(TaggingLabels.KEYWORD_LABEL)) {
            sb.append("</keyword>\n");
            return;
        }
        if (str2.equals(TaggingLabels.AFFILIATION_LABEL)) {
            sb.append("</affiliation>\n\t</byline>\n");
            return;
        }
        if (str2.equals(TaggingLabels.NOTE_LABEL)) {
            sb.append("</note>\n");
            return;
        }
        if (str2.equals(TaggingLabels.REFERENCE_LABEL)) {
            sb.append("</reference>\n");
            return;
        }
        if (str2.equals(TaggingLabels.COPYRIGHT_LABEL)) {
            sb.append("</note>\n");
            return;
        }
        if (str2.equals(TaggingLabels.GRANT_LABEL)) {
            sb.append("</note>\n");
            return;
        }
        if (str2.equals(TaggingLabels.ENTITLE_LABEL)) {
            sb.append("</note>\n");
            return;
        }
        if (str2.equals(TaggingLabels.SUBMISSION_LABEL)) {
            sb.append("</note>\n");
            return;
        }
        if (str2.equals(TaggingLabels.DEDICATION_LABEL)) {
            sb.append("</dedication>\n");
            return;
        }
        if (str2.equals(TaggingLabels.WEB_LABEL)) {
            sb.append("</ptr>\n");
            return;
        }
        if (str2.equals(TaggingLabels.PHONE_LABEL)) {
            sb.append("</phone>\n");
            return;
        }
        if (str2.equals(TaggingLabels.PUBNUM_LABEL)) {
            sb.append("</idno>\n");
        } else if (str2.equals(TaggingLabels.DEGREE_LABEL)) {
            sb.append("</note>\n");
        } else if (str2.equals(TaggingLabels.INTRO_LABEL)) {
            sb.append("</p>\n");
        }
    }

    private boolean writeField(StringBuilder sb, String str, String str2, String str3, String str4, String str5, boolean z) {
        boolean z2 = false;
        if (str.equals(str4) || str.equals("I-" + str4)) {
            z2 = true;
            if (!str.equals(str2) && !str.equals("I-" + str2)) {
                sb.append("\n\t").append(str5).append(str3);
            } else if (z) {
                sb.append(" ").append(str3);
            } else {
                sb.append(str3);
            }
        }
        return z2;
    }

    public BiblioItem consolidateHeader(BiblioItem biblioItem, int i) {
        if (i == 0) {
            return biblioItem;
        }
        try {
            Consolidation consolidation = Consolidation.getInstance();
            if (consolidation.getCntManager() == null) {
                consolidation.setCntManager(this.cntManager);
            }
            BiblioItem consolidate = consolidation.consolidate(biblioItem, null);
            if (consolidate != null) {
                if (i == 1) {
                    BiblioItem.correct(biblioItem, consolidate);
                } else if (i == 2) {
                    BiblioItem.injectDOI(biblioItem, consolidate);
                }
            }
            return biblioItem;
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running bibliographical data consolidation.", e);
        }
    }

    @Override // org.grobid.core.engines.AbstractParser, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        super.close();
    }
}
