package org.grobid.core.engines;

import eugfc.imageio.plugins.PNMRegistry;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.TreeMap;
import javax.imageio.ImageIO;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.grobid.core.GrobidModels;
import org.grobid.core.document.BasicStructureBuilder;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentSource;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.label.SegmentationLabels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidExceptionStatus;
import org.grobid.core.features.FeatureFactory;
import org.grobid.core.features.FeaturesVectorSegmentation;
import org.grobid.core.layout.Block;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.GraphicObject;
import org.grobid.core.layout.GraphicObjectType;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.layout.Page;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.TextUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/grobid/core/engines/Segmentation.class */
public class Segmentation extends AbstractParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(Segmentation.class);
    private static final int NBBINS_POSITION = 12;
    private static final int NBBINS_SPACE = 5;
    private static final int NBBINS_DENSITY = 5;
    private static final int LINESCALE = 10;
    private LanguageUtilities languageUtilities;
    private FeatureFactory featureFactory;

    public Segmentation() {
        super(GrobidModels.SEGMENTATION);
        this.languageUtilities = LanguageUtilities.getInstance();
        this.featureFactory = FeatureFactory.getInstance();
    }

    public Document processing(DocumentSource documentSource, GrobidAnalysisConfig grobidAnalysisConfig) {
        try {
            Document document = new Document(documentSource);
            if (grobidAnalysisConfig.getAnalyzer() != null) {
                document.setAnalyzer(grobidAnalysisConfig.getAnalyzer());
            }
            document.addTokenizedDocument(grobidAnalysisConfig);
            Document prepareDocument = prepareDocument(document);
            File pdfAssetPath = grobidAnalysisConfig.getPdfAssetPath();
            if (pdfAssetPath != null) {
                dealWithImages(documentSource, prepareDocument, pdfAssetPath, grobidAnalysisConfig);
            }
            return prepareDocument;
        } finally {
            DocumentSource.close(documentSource, true, true, true);
        }
    }

    public Document processing(String str) {
        return prepareDocument(Document.createFromText(str));
    }

    public Document prepareDocument(Document document) {
        List<LayoutToken> tokenizations = document.getTokenizations();
        if (tokenizations.size() > GrobidProperties.getPdfTokensMax().intValue()) {
            throw new GrobidException("The document has " + tokenizations.size() + " tokens, but the limit is " + GrobidProperties.getPdfTokensMax(), GrobidExceptionStatus.TOO_MANY_TOKENS);
        }
        document.produceStatistics();
        String allLinesFeatured = getAllLinesFeatured(document);
        if (StringUtils.isNotEmpty(StringUtils.trim(allLinesFeatured))) {
            document = BasicStructureBuilder.generalResultSegmentation(document, label(allLinesFeatured), tokenizations);
        }
        return document;
    }

    private void dealWithImages(DocumentSource documentSource, Document document, File file, GrobidAnalysisConfig grobidAnalysisConfig) {
        List<GraphicObject> images;
        File[] listFiles;
        if (file != null) {
            if (!file.exists()) {
                if (file.mkdir()) {
                    LOGGER.debug("Directory created: " + file.getPath());
                } else {
                    LOGGER.error("Failed to create directory: " + file.getPath());
                }
            }
            PNMRegistry.registerAllServicesProviders();
            File file2 = new File(documentSource.getXmlFile().getAbsolutePath() + "_data");
            if (file2.exists() && (listFiles = file2.listFiles()) != null) {
                int i = 0;
                for (File file3 : listFiles) {
                    if (i > 5000) {
                        break;
                    }
                    String lowerCase = file3.getName().toLowerCase();
                    if (lowerCase.endsWith(".png") || !grobidAnalysisConfig.isPreprocessImages()) {
                        try {
                            if (!lowerCase.endsWith(".svg")) {
                                FileUtils.copyFileToDirectory(file3, file);
                                i++;
                            }
                        } catch (IOException e) {
                            LOGGER.error("Cannot copy file " + file3.getAbsolutePath() + " to " + file.getAbsolutePath(), e);
                        }
                    } else if (lowerCase.endsWith(".jpg") || lowerCase.endsWith(".ppm")) {
                        try {
                            ImageIO.write(ImageIO.read(file3), "png", new File(lowerCase.endsWith(".jpg") ? file.getPath() + File.separator + lowerCase.replace(".jpg", ".png") : file.getPath() + File.separator + lowerCase.replace(".ppm", ".png")));
                            i++;
                        } catch (IOException e2) {
                            LOGGER.error("Cannot convert file " + file3.getAbsolutePath() + " to ", e2);
                        }
                    }
                }
            }
            if (!grobidAnalysisConfig.isPreprocessImages() || (images = document.getImages()) == null) {
                return;
            }
            String path = file.getPath();
            int lastIndexOf = path.lastIndexOf(TextUtilities.SLASH);
            if (lastIndexOf != -1) {
                path = path.substring(lastIndexOf + 1, path.length());
            }
            for (GraphicObject graphicObject : images) {
                String filePath = graphicObject.getFilePath();
                if (filePath != null) {
                    String replace = filePath.replace(".ppm", ".png").replace(".jpg", ".png");
                    graphicObject.setFilePath(path + replace.substring(replace.indexOf(TextUtilities.SLASH), replace.length()));
                }
            }
        }
    }

    public String getAllLinesFeatured(Document document) {
        String text;
        List<Block> blocks = document.getBlocks();
        if (blocks == null || blocks.size() == 0) {
            return null;
        }
        if (blocks.size() > GrobidProperties.getPdfBlocksMax().intValue()) {
            throw new GrobidException("Postprocessed document is too big, contains: " + blocks.size(), GrobidExceptionStatus.TOO_MANY_BLOCKS);
        }
        TreeMap treeMap = new TreeMap();
        TreeMap treeMap2 = new TreeMap();
        for (Page page : document.getPages()) {
            if (page.getBlocks() != null && page.getBlocks().size() > 0) {
                for (int i = 0; i < page.getBlocks().size(); i++) {
                    if ((i < 2 || i > page.getBlocks().size() - 2) && (text = page.getBlocks().get(i).getText()) != null && text.length() > 0) {
                        String[] split = text.split("[\\n\\r]");
                        if (split.length > 0) {
                            String pattern = this.featureFactory.getPattern(split[0]);
                            if (pattern.length() > 8) {
                                Integer num = treeMap.get(pattern);
                                if (num == null) {
                                    treeMap.put(pattern, 1);
                                    treeMap2.put(pattern, false);
                                } else {
                                    treeMap.put(pattern, Integer.valueOf(num.intValue() + 1));
                                }
                            }
                        }
                    }
                }
            }
        }
        return getFeatureVectorsAsString(document, treeMap, treeMap2);
    }

    private String getFeatureVectorsAsString(Document document, Map<String, Integer> map, Map<String, Boolean> map2) {
        String pattern;
        Integer num;
        StringBuilder sb = new StringBuilder();
        int documentLenghtChar = document.getDocumentLenghtChar();
        String str = null;
        int i = -1;
        int i2 = 0;
        FeaturesVectorSegmentation featuresVectorSegmentation = null;
        for (Page page : document.getPages()) {
            double height = page.getHeight();
            boolean z = true;
            double d = 0.0d;
            int pageLengthChar = page.getPageLengthChar();
            BoundingBox mainArea = page.getMainArea();
            int i3 = 0;
            if (page.getBlocks() != null && page.getBlocks().size() != 0) {
                for (int i4 = 0; i4 < page.getBlocks().size(); i4++) {
                    Block block = page.getBlocks().get(i4);
                    boolean z2 = false;
                    boolean z3 = false;
                    boolean z4 = i4 == page.getBlocks().size() - 1;
                    boolean z5 = i4 == 0;
                    List<GraphicObject> connectedGraphics = Document.getConnectedGraphics(block, document);
                    if (connectedGraphics != null) {
                        for (GraphicObject graphicObject : connectedGraphics) {
                            if (graphicObject.getType() == GraphicObjectType.BITMAP) {
                                z3 = true;
                            }
                            if (graphicObject.getType() == GraphicObjectType.VECTOR) {
                                z2 = true;
                            }
                        }
                    }
                    double maxBlockSpacing = d > block.getY() ? document.getMaxBlockSpacing() / 5.0d : block.getY() - d;
                    String text = block.getText();
                    if (text != null) {
                        double d2 = 0.0d;
                        if (block.getHeight() != 0.0d && block.getWidth() != 0.0d && block.getText() != null && !block.getText().contains("@PAGE") && !block.getText().contains("@IMAGE")) {
                            d2 = block.getText().length() / (block.getHeight() * block.getWidth());
                        }
                        boolean z6 = true;
                        BoundingBox fromPointAndDimensions = BoundingBox.fromPointAndDimensions(page.getNumber(), block.getX(), block.getY(), block.getWidth(), block.getHeight());
                        if (mainArea == null || (!mainArea.contains(fromPointAndDimensions) && !mainArea.intersect(fromPointAndDimensions))) {
                            z6 = false;
                        }
                        String[] split = text.split("[\\n\\r]");
                        int i5 = 0;
                        for (int i6 = 0; i6 < split.length; i6++) {
                            if (split[i6].length() > i5) {
                                i5 = split[i6].length();
                            }
                        }
                        List<LayoutToken> tokens = block.getTokens();
                        if (tokens != null && tokens.size() != 0) {
                            for (int i7 = 0; i7 < split.length; i7++) {
                                String str2 = split[i7];
                                LayoutToken layoutToken = tokens.size() > 0 ? tokens.get(0) : null;
                                double y = layoutToken.getY();
                                FeaturesVectorSegmentation featuresVectorSegmentation2 = new FeaturesVectorSegmentation();
                                featuresVectorSegmentation2.token = layoutToken;
                                featuresVectorSegmentation2.line = str2;
                                if ((i4 < 2 || i4 > page.getBlocks().size() - 2) && (num = map.get((pattern = this.featureFactory.getPattern(str2)))) != null && num.intValue() > 1) {
                                    featuresVectorSegmentation2.repetitivePattern = true;
                                    Boolean bool = map2.get(pattern);
                                    if (bool != null && !bool.booleanValue()) {
                                        featuresVectorSegmentation2.firstRepetitivePattern = true;
                                        map2.put(pattern, true);
                                    }
                                }
                                StringTokenizer stringTokenizer = new StringTokenizer(str2, " \t");
                                String nextToken = stringTokenizer.hasMoreTokens() ? stringTokenizer.nextToken() : null;
                                String nextToken2 = stringTokenizer.hasMoreTokens() ? stringTokenizer.nextToken() : null;
                                if (nextToken != null) {
                                    String trim = nextToken.replaceAll("[ \n]", "").trim();
                                    if (trim.length() != 0 && !TextUtilities.filterLine(str2)) {
                                        featuresVectorSegmentation2.string = trim;
                                        featuresVectorSegmentation2.secondString = nextToken2;
                                        featuresVectorSegmentation2.firstPageBlock = z5;
                                        featuresVectorSegmentation2.lastPageBlock = z4;
                                        featuresVectorSegmentation2.lineLength = this.featureFactory.linearScaling(str2.length(), i5, 10);
                                        featuresVectorSegmentation2.punctuationProfile = TextUtilities.punctuationProfile(str2);
                                        if (z3) {
                                            featuresVectorSegmentation2.bitmapAround = true;
                                        }
                                        if (z2) {
                                            featuresVectorSegmentation2.vectorAround = true;
                                        }
                                        featuresVectorSegmentation2.lineStatus = null;
                                        featuresVectorSegmentation2.punctType = null;
                                        if (i7 == 0 || (featuresVectorSegmentation != null && featuresVectorSegmentation.blockStatus.equals("BLOCKEND"))) {
                                            featuresVectorSegmentation2.blockStatus = "BLOCKSTART";
                                        } else if (i7 == split.length - 1) {
                                            featuresVectorSegmentation2.blockStatus = "BLOCKEND";
                                        } else if (featuresVectorSegmentation2.blockStatus == null) {
                                            featuresVectorSegmentation2.blockStatus = "BLOCKIN";
                                        }
                                        if (z) {
                                            featuresVectorSegmentation2.pageStatus = "PAGESTART";
                                            z = false;
                                            if (featuresVectorSegmentation != null) {
                                                featuresVectorSegmentation.pageStatus = "PAGEEND";
                                            }
                                        } else {
                                            featuresVectorSegmentation2.pageStatus = "PAGEIN";
                                            z = false;
                                        }
                                        if (trim.length() == 1) {
                                            featuresVectorSegmentation2.singleChar = true;
                                        }
                                        if (Character.isUpperCase(trim.charAt(0))) {
                                            featuresVectorSegmentation2.capitalisation = "INITCAP";
                                        }
                                        if (this.featureFactory.test_all_capital(trim)) {
                                            featuresVectorSegmentation2.capitalisation = "ALLCAP";
                                        }
                                        FeatureFactory featureFactory = this.featureFactory;
                                        if (FeatureFactory.test_digit(trim)) {
                                            featuresVectorSegmentation2.digit = "CONTAINSDIGITS";
                                        }
                                        if (this.featureFactory.test_common(trim)) {
                                            featuresVectorSegmentation2.commonName = true;
                                        }
                                        if (this.featureFactory.test_names(trim)) {
                                            featuresVectorSegmentation2.properName = true;
                                        }
                                        if (this.featureFactory.test_month(trim)) {
                                            featuresVectorSegmentation2.month = true;
                                        }
                                        if (this.featureFactory.isDigit.matcher(trim).find()) {
                                            featuresVectorSegmentation2.digit = "ALLDIGIT";
                                        }
                                        if (this.featureFactory.year.matcher(trim).find()) {
                                            featuresVectorSegmentation2.year = true;
                                        }
                                        if (this.featureFactory.email.matcher(trim).find()) {
                                            featuresVectorSegmentation2.email = true;
                                        }
                                        if (this.featureFactory.http.matcher(trim).find()) {
                                            featuresVectorSegmentation2.http = true;
                                        }
                                        if (str == null) {
                                            str = layoutToken.getFont();
                                            featuresVectorSegmentation2.fontStatus = "NEWFONT";
                                        } else if (str.equals(layoutToken.getFont())) {
                                            featuresVectorSegmentation2.fontStatus = "SAMEFONT";
                                        } else {
                                            str = layoutToken.getFont();
                                            featuresVectorSegmentation2.fontStatus = "NEWFONT";
                                        }
                                        int fontSize = (int) layoutToken.getFontSize();
                                        if (i == -1) {
                                            i = fontSize;
                                            featuresVectorSegmentation2.fontSize = "HIGHERFONT";
                                        } else if (i == fontSize) {
                                            featuresVectorSegmentation2.fontSize = "SAMEFONTSIZE";
                                        } else if (i < fontSize) {
                                            featuresVectorSegmentation2.fontSize = "HIGHERFONT";
                                            i = fontSize;
                                        } else if (i > fontSize) {
                                            featuresVectorSegmentation2.fontSize = "LOWERFONT";
                                            i = fontSize;
                                        }
                                        if (layoutToken.getBold()) {
                                            featuresVectorSegmentation2.bold = true;
                                        }
                                        if (layoutToken.getItalic()) {
                                            featuresVectorSegmentation2.italic = true;
                                        }
                                        if (featuresVectorSegmentation2.capitalisation == null) {
                                            featuresVectorSegmentation2.capitalisation = "NOCAPS";
                                        }
                                        if (featuresVectorSegmentation2.digit == null) {
                                            featuresVectorSegmentation2.digit = "NODIGIT";
                                        }
                                        featuresVectorSegmentation2.relativeDocumentPosition = this.featureFactory.linearScaling(i2, documentLenghtChar, 12);
                                        featuresVectorSegmentation2.relativePagePositionChar = this.featureFactory.linearScaling(i3, pageLengthChar, 12);
                                        int linearScaling = this.featureFactory.linearScaling(y, height, 12);
                                        if (linearScaling > 12) {
                                            linearScaling = 12;
                                        }
                                        featuresVectorSegmentation2.relativePagePosition = linearScaling;
                                        if (maxBlockSpacing != 0.0d) {
                                            featuresVectorSegmentation2.spacingWithPreviousBlock = this.featureFactory.linearScaling(maxBlockSpacing - document.getMinBlockSpacing(), document.getMaxBlockSpacing() - document.getMinBlockSpacing(), 5);
                                        }
                                        featuresVectorSegmentation2.inMainArea = z6;
                                        if (d2 != -1.0d) {
                                            featuresVectorSegmentation2.characterDensity = this.featureFactory.linearScaling(d2 - document.getMinCharacterDensity(), document.getMaxCharacterDensity() - document.getMinCharacterDensity(), 5);
                                        }
                                        if (featuresVectorSegmentation != null) {
                                            sb.append(featuresVectorSegmentation.printVector());
                                        }
                                        featuresVectorSegmentation = featuresVectorSegmentation2;
                                    }
                                }
                            }
                            d = block.getY() + block.getHeight();
                            if (tokens != null) {
                                i3 += tokens.size();
                                i2 += tokens.size();
                            }
                        }
                    }
                }
            }
        }
        if (featuresVectorSegmentation != null) {
            sb.append(featuresVectorSegmentation.printVector());
        }
        return sb.toString();
    }

    public void createTrainingSegmentation(String str, String str2, String str3, int i) {
        try {
            try {
                File file = new File(str);
                DocumentSource fromPdf = DocumentSource.fromPdf(file, -1, -1, true, true, true);
                Document document = new Document(fromPdf);
                String name = file.getName();
                document.addTokenizedDocument(GrobidAnalysisConfig.defaultInstance());
                if (document.getBlocks() == null) {
                    throw new Exception("PDF parsing resulted in empty content");
                }
                document.produceStatistics();
                String allLinesFeatured = getAllLinesFeatured(document);
                List<LayoutToken> tokenizationsFulltext = document.getTokenizationsFulltext();
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.segmentation")), false), "UTF-8");
                outputStreamWriter.write(allLinesFeatured + "\n");
                outputStreamWriter.close();
                StringBuffer stringBuffer = new StringBuffer();
                Iterator<LayoutToken> it = tokenizationsFulltext.iterator();
                while (it.hasNext()) {
                    stringBuffer.append(it.next().getText());
                }
                FileUtils.writeStringToFile(new File(str2 + File.separator + name.replace(".pdf", ".training.segmentation.rawtxt")), stringBuffer.toString(), "UTF-8");
                if (StringUtils.isNotBlank(allLinesFeatured)) {
                    StringBuffer trainingExtraction = trainingExtraction(label(allLinesFeatured), tokenizationsFulltext, document);
                    OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str3 + File.separator + name.replace(".pdf", ".training.segmentation.tei.xml")), false), "UTF-8");
                    outputStreamWriter2.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + i + "\"/>\n\t</teiHeader>\n\t<text xml:lang=\"en\">\n");
                    outputStreamWriter2.write(trainingExtraction.toString());
                    outputStreamWriter2.write("\n\t</text>\n</tei>\n");
                    outputStreamWriter2.close();
                }
                DocumentSource.close(fromPdf, true, true, true);
            } catch (Exception e) {
                throw new GrobidException("An exception occured while running Grobid training data generation for segmentation model.", e);
            }
        } catch (Throwable th) {
            DocumentSource.close(null, true, true, true);
            throw th;
        }
    }

    public void createBlankTrainingData(File file, String str, String str2, int i) {
        try {
            try {
                DocumentSource fromPdf = DocumentSource.fromPdf(file, -1, -1, true, true, true);
                Document document = new Document(fromPdf);
                String name = file.getName();
                document.addTokenizedDocument(GrobidAnalysisConfig.defaultInstance());
                if (document.getBlocks() == null) {
                    throw new Exception("PDF parsing resulted in empty content");
                }
                document.produceStatistics();
                String allLinesFeatured = getAllLinesFeatured(document);
                List<LayoutToken> tokenizationsFulltext = document.getTokenizationsFulltext();
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str + File.separator + name.replace(".pdf", ".training.blank")), false), "UTF-8");
                outputStreamWriter.write(allLinesFeatured + "\n");
                outputStreamWriter.close();
                StringBuffer stringBuffer = new StringBuffer();
                Iterator<LayoutToken> it = tokenizationsFulltext.iterator();
                while (it.hasNext()) {
                    stringBuffer.append(TextUtilities.HTMLEncode(it.next().getText()));
                }
                String stringBuffer2 = stringBuffer.toString();
                if (StringUtils.isNotBlank(stringBuffer2)) {
                    OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str2 + File.separator + name.replace(".pdf", ".training.blank.tei.xml")), false), "UTF-8");
                    outputStreamWriter2.write("<?xml version=\"1.0\" ?>\n<tei xml:space=\"preserve\">\n\t<teiHeader>\n\t\t<fileDesc xml:id=\"" + i + "\"/>\n\t</teiHeader>\n\t<text xml:lang=\"en\">\n");
                    outputStreamWriter2.write(stringBuffer2);
                    outputStreamWriter2.write("\n\t</text>\n</tei>\n");
                    outputStreamWriter2.close();
                }
                DocumentSource.close(fromPdf, true, true, true);
            } catch (Exception e) {
                throw new GrobidException("An exception occured while running Grobid training data generation for segmentation model.", e);
            }
        } catch (Throwable th) {
            DocumentSource.close(null, true, true, true);
            throw th;
        }
    }

    public StringBuffer trainingExtraction(String str, List<LayoutToken> list, Document document) {
        StringBuffer stringBuffer = new StringBuffer();
        try {
            List<Block> blocks = document.getBlocks();
            int i = 0;
            int i2 = 0;
            StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
            String str2 = null;
            String str3 = null;
            String str4 = null;
            boolean z = true;
            while (stringTokenizer.hasMoreTokens()) {
                String trim = stringTokenizer.nextToken().trim();
                if (trim.length() != 0) {
                    StringTokenizer stringTokenizer2 = new StringTokenizer(trim, " \t");
                    ArrayList arrayList = new ArrayList();
                    int i3 = 0;
                    int countTokens = stringTokenizer2.countTokens();
                    while (stringTokenizer2.hasMoreTokens()) {
                        String trim2 = stringTokenizer2.nextToken().trim();
                        if (i3 == 0) {
                            str3 = TextUtilities.HTMLEncode(trim2);
                        } else if (i3 == 1) {
                            TextUtilities.HTMLEncode(trim2);
                        } else if (i3 == countTokens - 1) {
                            str2 = trim2;
                        } else {
                            arrayList.add(trim2);
                        }
                        i3++;
                    }
                    String str5 = null;
                    while (str5 == null && i < blocks.size()) {
                        Block block = blocks.get(i);
                        if (block.getTokens() == null) {
                            i++;
                            i2 = 0;
                        } else {
                            String text = block.getText();
                            if (text == null || text.trim().length() == 0) {
                                i++;
                                i2 = 0;
                            } else {
                                String[] split = text.split("[\\n\\r]");
                                if (split.length == 0 || i2 >= split.length) {
                                    i++;
                                    i2 = 0;
                                } else {
                                    str5 = split[i2];
                                    i2++;
                                    if (str5.trim().length() == 0) {
                                        str5 = null;
                                    } else if (TextUtilities.filterLine(str5)) {
                                        str5 = null;
                                    }
                                }
                            }
                        }
                    }
                    String HTMLEncode = TextUtilities.HTMLEncode(str5);
                    if (1 != 0 && !z) {
                        stringBuffer.append("<lb/>");
                    }
                    String str6 = null;
                    if (str4 != null) {
                        str6 = str4.startsWith("I-") ? str4.substring(2, str4.length()) : str4;
                    }
                    String str7 = null;
                    if (str2 != null) {
                        str7 = str2.startsWith("I-") ? str2.substring(2, str2.length()) : str2;
                    }
                    if (str4 != null) {
                        testClosingTag(stringBuffer, str7, str6, str2);
                    }
                    boolean writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.HEADER_LABEL, "<front>", false, 3);
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.HEADNOTE_LABEL, "<note place=\"headnote\">", false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.FOOTNOTE_LABEL, "<note place=\"footnote\">", false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.MARGINNOTE_LABEL, "<note place=\"margin\">", false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, "<page>", "<page>", false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.REFERENCES_LABEL, "<listBibl>", false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.BODY_LABEL, SegmentationLabels.BODY_LABEL, false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, "<cover>", "<titlePage>", false, 3);
                    }
                    if (!writeField) {
                        writeField = writeField(stringBuffer, HTMLEncode, str2, str6, str3, "<annex>", "<div type=\"annex\">", false, 3);
                    }
                    if (!writeField) {
                        writeField(stringBuffer, HTMLEncode, str2, str6, str3, SegmentationLabels.ACKNOWLEDGEMENT_LABEL, "<div type=\"acknowledgement\">", false, 3);
                    }
                    str4 = str2;
                    if (!stringTokenizer.hasMoreTokens() && str4 != null) {
                        testClosingTag(stringBuffer, "", str7, str2);
                    }
                    if (z) {
                        z = false;
                    }
                }
            }
            return stringBuffer;
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    private boolean writeField(StringBuffer stringBuffer, String str, String str2, String str3, String str4, String str5, String str6, boolean z, int i) {
        boolean z2 = false;
        if (str2.equals(str5) || str2.equals("I-" + str5)) {
            z2 = true;
            String replace = str.replace("@BULLET", "•");
            if (str2.equals(str3) || str2.equals("I-" + str3)) {
                stringBuffer.append(replace);
            } else if (str3 == null) {
                for (int i2 = 0; i2 < i; i2++) {
                    stringBuffer.append("\t");
                }
                stringBuffer.append(str6).append(replace);
            } else if (str3.equals("<titlePage>")) {
                stringBuffer.append(replace);
            } else {
                for (int i3 = 0; i3 < i; i3++) {
                    stringBuffer.append("\t");
                }
                stringBuffer.append(str6).append(replace);
            }
        }
        return z2;
    }

    private boolean testClosingTag(StringBuffer stringBuffer, String str, String str2, String str3) {
        boolean z = false;
        if (!str.equals(str2)) {
            z = false;
            if (str2.equals(SegmentationLabels.HEADER_LABEL)) {
                stringBuffer.append("</front>\n\n");
            } else if (str2.equals(SegmentationLabels.BODY_LABEL)) {
                stringBuffer.append("</body>\n\n");
            } else if (str2.equals(SegmentationLabels.HEADNOTE_LABEL)) {
                stringBuffer.append("</note>\n\n");
            } else if (str2.equals(SegmentationLabels.FOOTNOTE_LABEL)) {
                stringBuffer.append("</note>\n\n");
            } else if (str2.equals(SegmentationLabels.MARGINNOTE_LABEL)) {
                stringBuffer.append("</note>\n\n");
            } else if (str2.equals(SegmentationLabels.REFERENCES_LABEL)) {
                stringBuffer.append("</listBibl>\n\n");
                z = true;
            } else if (str2.equals("<page>")) {
                stringBuffer.append("</page>\n\n");
            } else if (str2.equals("<cover>")) {
                stringBuffer.append("</titlePage>\n\n");
            } else if (str2.equals("<annex>")) {
                stringBuffer.append("</div>\n\n");
            } else if (str2.equals(SegmentationLabels.ACKNOWLEDGEMENT_LABEL)) {
                stringBuffer.append("</div>\n\n");
            } else {
                z = false;
            }
        }
        return z;
    }

    @Override // org.grobid.core.engines.AbstractParser, java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        super.close();
    }
}
