package org.grobid.core.document;

import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.SortedSetMultimap;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.io.IOUtils;
import org.apache.xalan.templates.Constants;
import org.apache.xpath.XPath;
import org.grobid.core.analyzers.Analyzer;
import org.grobid.core.analyzers.GrobidAnalyzer;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.Equation;
import org.grobid.core.data.Figure;
import org.grobid.core.data.Metadata;
import org.grobid.core.data.Table;
import org.grobid.core.engines.Engine;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.counters.FigureCounters;
import org.grobid.core.engines.counters.TableRejectionCounters;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidExceptionStatus;
import org.grobid.core.features.FeatureFactory;
import org.grobid.core.features.FeaturesVectorHeader;
import org.grobid.core.layout.Block;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.Cluster;
import org.grobid.core.layout.GraphicObject;
import org.grobid.core.layout.GraphicObjectType;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.layout.PDFAnnotation;
import org.grobid.core.layout.Page;
import org.grobid.core.layout.VectorGraphicBoxCalculator;
import org.grobid.core.sax.PDFALTOAnnotationSaxHandler;
import org.grobid.core.sax.PDFALTOOutlineSaxHandler;
import org.grobid.core.sax.PDFALTOSaxHandler;
import org.grobid.core.sax.PDFMetadataSaxHandler;
import org.grobid.core.utilities.BoundingBoxCalculator;
import org.grobid.core.utilities.ElementCounter;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.Pair;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.Utilities;
import org.grobid.core.utilities.matching.EntityMatcherException;
import org.grobid.core.utilities.matching.ReferenceMarkerMatcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/grobid/core/document/Document.class */
public class Document implements Serializable {
    public static final long serialVersionUID = 1;
    public static final int MAX_FIG_BOX_DISTANCE = 70;
    protected final transient DocumentSource documentSource;
    protected String pathXML;
    protected String lang;
    protected transient List<Page> pages;
    protected transient List<Cluster> clusters;
    protected transient List<Block> blocks;
    protected List<Integer> blockHeaders;
    protected List<Integer> blockFooters;
    protected List<Integer> blockSectionTitles;
    protected List<Integer> acknowledgementBlocks;
    protected List<Integer> blockDocumentHeaders;
    protected transient SortedSet<DocumentPiece> blockReferences;
    protected List<Integer> blockTables;
    protected List<Integer> blockFigures;
    protected List<Integer> blockHeadTables;
    protected List<Integer> blockHeadFigures;
    protected transient FeatureFactory featureFactory;
    protected transient SortedSetMultimap<String, DocumentPiece> labeledBlocks;
    protected List<LayoutToken> tokenizations;
    protected transient Map<String, BibDataSet> teiIdToBibDataSets;
    protected transient List<BibDataSet> bibDataSets;
    protected transient BiblioItem resHeader;
    protected String tei;
    protected transient ReferenceMarkerMatcher referenceMarkerMatcher;
    protected transient List<GraphicObject> images;
    protected transient List<PDFAnnotation> pdfAnnotations;
    protected transient DocumentNode outlineRoot;
    protected transient Metadata metadata;
    protected transient Multimap<Integer, GraphicObject> imagesPerPage;
    protected double maxCharacterDensity;
    protected double minCharacterDensity;
    protected double maxBlockSpacing;
    protected double minBlockSpacing;
    protected int documentLenghtChar;
    protected int beginBody;
    protected int beginReferences;
    protected boolean titleMatchNum;
    protected transient List<Figure> figures;
    protected transient Predicate<GraphicObject> validGraphicObjectPredicate;
    protected int m;
    protected transient List<Table> tables;
    protected transient List<Equation> equations;
    protected transient Analyzer analyzer;
    protected static final int nbBins = 12;
    protected static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) Document.class);
    protected static double MIN_DISTANCE = 100.0d;

    public void setImages(List<GraphicObject> list) {
        this.images = list;
    }

    public Document(DocumentSource documentSource) {
        this.pathXML = null;
        this.lang = null;
        this.pages = null;
        this.clusters = null;
        this.blocks = null;
        this.blockHeaders = null;
        this.blockFooters = null;
        this.blockSectionTitles = null;
        this.acknowledgementBlocks = null;
        this.blockDocumentHeaders = null;
        this.blockReferences = null;
        this.blockTables = null;
        this.blockFigures = null;
        this.blockHeadTables = null;
        this.blockHeadFigures = null;
        this.featureFactory = null;
        this.tokenizations = null;
        this.teiIdToBibDataSets = null;
        this.bibDataSets = null;
        this.resHeader = null;
        this.images = null;
        this.pdfAnnotations = null;
        this.outlineRoot = null;
        this.metadata = null;
        this.imagesPerPage = LinkedListMultimap.create();
        this.maxCharacterDensity = XPath.MATCH_SCORE_QNAME;
        this.minCharacterDensity = XPath.MATCH_SCORE_QNAME;
        this.maxBlockSpacing = XPath.MATCH_SCORE_QNAME;
        this.minBlockSpacing = XPath.MATCH_SCORE_QNAME;
        this.documentLenghtChar = -1;
        this.beginBody = -1;
        this.beginReferences = -1;
        this.titleMatchNum = false;
        this.analyzer = GrobidAnalyzer.getInstance();
        this.documentSource = documentSource;
        setPathXML(documentSource.getXmlFile());
    }

    protected Document() {
        this.pathXML = null;
        this.lang = null;
        this.pages = null;
        this.clusters = null;
        this.blocks = null;
        this.blockHeaders = null;
        this.blockFooters = null;
        this.blockSectionTitles = null;
        this.acknowledgementBlocks = null;
        this.blockDocumentHeaders = null;
        this.blockReferences = null;
        this.blockTables = null;
        this.blockFigures = null;
        this.blockHeadTables = null;
        this.blockHeadFigures = null;
        this.featureFactory = null;
        this.tokenizations = null;
        this.teiIdToBibDataSets = null;
        this.bibDataSets = null;
        this.resHeader = null;
        this.images = null;
        this.pdfAnnotations = null;
        this.outlineRoot = null;
        this.metadata = null;
        this.imagesPerPage = LinkedListMultimap.create();
        this.maxCharacterDensity = XPath.MATCH_SCORE_QNAME;
        this.minCharacterDensity = XPath.MATCH_SCORE_QNAME;
        this.maxBlockSpacing = XPath.MATCH_SCORE_QNAME;
        this.minBlockSpacing = XPath.MATCH_SCORE_QNAME;
        this.documentLenghtChar = -1;
        this.beginBody = -1;
        this.beginReferences = -1;
        this.titleMatchNum = false;
        this.analyzer = GrobidAnalyzer.getInstance();
        this.documentSource = null;
    }

    public static Document createFromText(String str) {
        Document document = new Document();
        document.fromText(str);
        return document;
    }

    public void setLanguage(String str) {
        this.lang = str;
    }

    public String getLanguage() {
        return this.lang;
    }

    public BiblioItem getResHeader() {
        return this.resHeader;
    }

    public List<Block> getBlocks() {
        return this.blocks;
    }

    public List<BibDataSet> getBibDataSets() {
        return this.bibDataSets;
    }

    public void addBlock(Block block) {
        if (this.blocks == null) {
            this.blocks = new ArrayList();
        }
        this.blocks.add(block);
    }

    public List<GraphicObject> getImages() {
        return this.images;
    }

    public List<PDFAnnotation> getPDFAnnotations() {
        return this.pdfAnnotations;
    }

    public Metadata getMetadata() {
        return this.metadata;
    }

    protected void setPathXML(File file) {
        this.pathXML = file.getAbsolutePath();
    }

    public List<LayoutToken> getTokenizations() {
        return this.tokenizations;
    }

    public int getDocumentLenghtChar() {
        return this.documentLenghtChar;
    }

    public double getMaxCharacterDensity() {
        return this.maxCharacterDensity;
    }

    public double getMinCharacterDensity() {
        return this.minCharacterDensity;
    }

    public double getMaxBlockSpacing() {
        return this.maxBlockSpacing;
    }

    public double getMinBlockSpacing() {
        return this.minBlockSpacing;
    }

    public void setAnalyzer(Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    public Analyzer getAnalyzer() {
        return this.analyzer;
    }

    @Deprecated
    public List<LayoutToken> getTokenizationsHeader() {
        ArrayList arrayList = new ArrayList();
        Iterator<Integer> it = this.blockDocumentHeaders.iterator();
        while (it.hasNext()) {
            List<LayoutToken> tokens = this.blocks.get(it.next().intValue()).getTokens();
            if (tokens != null && tokens.size() != 0) {
                Iterator<LayoutToken> it2 = tokens.iterator();
                while (it2.hasNext()) {
                    arrayList.add(it2.next());
                }
            }
        }
        return arrayList;
    }

    @Deprecated
    public List<LayoutToken> getTokenizationsFulltext() {
        ArrayList arrayList = new ArrayList();
        for (Block block : this.blocks) {
            int startToken = block.getStartToken();
            int endToken = block.getEndToken();
            for (int i = startToken; i < endToken; i++) {
                arrayList.add(this.tokenizations.get(i));
            }
        }
        return arrayList;
    }

    @Deprecated
    public List<LayoutToken> getTokenizationsReferences() {
        ArrayList arrayList = new ArrayList();
        for (DocumentPiece documentPiece : this.blockReferences) {
            arrayList.addAll(this.tokenizations.subList(documentPiece.getLeft().getTokenDocPos(), documentPiece.getRight().getTokenDocPos()));
        }
        return arrayList;
    }

    public List<LayoutToken> fromText(String str) {
        List<String> list = null;
        try {
            list = GrobidAnalyzer.getInstance().tokenize(str);
        } catch (Exception e) {
            LOGGER.error("Fail tokenization for " + str, (Throwable) e);
        }
        this.tokenizations = (List) list.stream().map(LayoutToken::new).collect(Collectors.toList());
        this.blocks = new ArrayList();
        Block block = new Block();
        Iterator<LayoutToken> it = this.tokenizations.iterator();
        while (it.hasNext()) {
            block.addToken(it.next());
        }
        Page page = new Page(1);
        block.setPage(page);
        block.setText(str);
        this.pages = new ArrayList();
        this.pages.add(page);
        this.blocks.add(block);
        page.addBlock(block);
        block.setStartToken(0);
        block.setEndToken(list.size() - 1);
        this.images = new ArrayList();
        return this.tokenizations;
    }

    public List<LayoutToken> addTokenizedDocument(GrobidAnalysisConfig grobidAnalysisConfig) {
        this.images = new ArrayList();
        PDFALTOSaxHandler pDFALTOSaxHandler = new PDFALTOSaxHandler(this, this.images);
        if (grobidAnalysisConfig.getAnalyzer() != null) {
            pDFALTOSaxHandler.setAnalyzer(grobidAnalysisConfig.getAnalyzer());
        }
        this.pdfAnnotations = new ArrayList();
        PDFALTOAnnotationSaxHandler pDFALTOAnnotationSaxHandler = new PDFALTOAnnotationSaxHandler(this, this.pdfAnnotations);
        PDFALTOOutlineSaxHandler pDFALTOOutlineSaxHandler = new PDFALTOOutlineSaxHandler(this);
        PDFMetadataSaxHandler pDFMetadataSaxHandler = new PDFMetadataSaxHandler(this);
        SAXParserFactory newInstance = SAXParserFactory.newInstance();
        this.tokenizations = null;
        File file = new File(this.pathXML);
        File file2 = new File(this.pathXML + "_annot.xml");
        File file3 = new File(this.pathXML + "_outline.xml");
        File file4 = new File(this.pathXML + "_metadata.xml");
        FileInputStream fileInputStream = null;
        try {
            try {
                fileInputStream = new FileInputStream(file);
                newInstance.newSAXParser().parse(fileInputStream, pDFALTOSaxHandler);
                this.tokenizations = pDFALTOSaxHandler.getTokenization();
                if (fileInputStream != null) {
                    try {
                        fileInputStream.close();
                    } catch (IOException e) {
                        LOGGER.error("Cannot close input stream", (Throwable) e);
                    }
                }
                IOUtils.closeQuietly((InputStream) fileInputStream);
                try {
                    if (file2.exists()) {
                        try {
                            fileInputStream = new FileInputStream(file2);
                            newInstance.newSAXParser().parse(fileInputStream, pDFALTOAnnotationSaxHandler);
                            IOUtils.closeQuietly((InputStream) fileInputStream);
                        } catch (GrobidException e2) {
                            throw e2;
                        } catch (Exception e3) {
                            LOGGER.error("Cannot parse file: " + file2, e3, GrobidExceptionStatus.PARSING_ERROR);
                            IOUtils.closeQuietly((InputStream) fileInputStream);
                        }
                    }
                    try {
                        if (file3.exists()) {
                            try {
                                try {
                                    fileInputStream = new FileInputStream(file3);
                                    newInstance.newSAXParser().parse(fileInputStream, pDFALTOOutlineSaxHandler);
                                    this.outlineRoot = pDFALTOOutlineSaxHandler.getRootNode();
                                    IOUtils.closeQuietly((InputStream) fileInputStream);
                                } catch (Exception e4) {
                                    LOGGER.error("Cannot parse file: " + file3, e4, GrobidExceptionStatus.PARSING_ERROR);
                                    IOUtils.closeQuietly((InputStream) fileInputStream);
                                }
                            } catch (GrobidException e5) {
                                throw e5;
                            }
                        }
                        try {
                            if (file4.exists()) {
                                try {
                                    fileInputStream = new FileInputStream(file4);
                                    newInstance.newSAXParser().parse(fileInputStream, pDFMetadataSaxHandler);
                                    this.metadata = pDFMetadataSaxHandler.getMetadata();
                                    IOUtils.closeQuietly((InputStream) fileInputStream);
                                } catch (GrobidException e6) {
                                    throw e6;
                                } catch (Exception e7) {
                                    LOGGER.error("Cannot parse file: " + file4, e7, GrobidExceptionStatus.PARSING_ERROR);
                                    IOUtils.closeQuietly((InputStream) fileInputStream);
                                }
                            }
                            if (getBlocks() == null) {
                                throw new GrobidException("PDF parsing resulted in empty content", GrobidExceptionStatus.NO_BLOCKS);
                            }
                            calculatePageMainAreas();
                            if (grobidAnalysisConfig.isProcessVectorGraphics()) {
                                try {
                                    Iterator<GraphicObject> it = VectorGraphicBoxCalculator.calculate(this).values().iterator();
                                    while (it.hasNext()) {
                                        this.images.add(it.next());
                                    }
                                } catch (Exception e8) {
                                    throw new GrobidException("Cannot process vector graphics: " + file, e8, GrobidExceptionStatus.PARSING_ERROR);
                                }
                            }
                            for (GraphicObject graphicObject : this.images) {
                                if (graphicObject.getType() != GraphicObjectType.BITMAP || isValidBitmapGraphicObject(graphicObject)) {
                                    this.imagesPerPage.put(Integer.valueOf(graphicObject.getPage()), graphicObject);
                                }
                            }
                            Iterator it2 = new HashSet(this.imagesPerPage.keySet()).iterator();
                            while (it2.hasNext()) {
                                Integer num = (Integer) it2.next();
                                Collection<GraphicObject> collection = this.imagesPerPage.get(num);
                                if (collection.size() > 100) {
                                    this.imagesPerPage.removeAll(num);
                                    Engine.getCntManager().i(FigureCounters.TOO_MANY_FIGURES_PER_PAGE);
                                } else {
                                    ArrayList<GraphicObject> glueImagesIfNecessary = glueImagesIfNecessary(num, Lists.newArrayList(collection));
                                    if (glueImagesIfNecessary != null) {
                                        this.imagesPerPage.removeAll(num);
                                        this.imagesPerPage.putAll(num, glueImagesIfNecessary);
                                    }
                                }
                            }
                            return this.tokenizations;
                        } catch (Throwable th) {
                            IOUtils.closeQuietly((InputStream) fileInputStream);
                            throw th;
                        }
                    } catch (Throwable th2) {
                        IOUtils.closeQuietly((InputStream) fileInputStream);
                        throw th2;
                    }
                } catch (Throwable th3) {
                    throw th3;
                }
            } finally {
                IOUtils.closeQuietly((InputStream) fileInputStream);
            }
        } catch (GrobidException e9) {
            throw e9;
        } catch (Exception e10) {
            throw new GrobidException("Cannot parse file: " + file, e10, GrobidExceptionStatus.PARSING_ERROR);
        }
    }

    private void calculatePageMainAreas() {
        ElementCounter elementCounter = new ElementCounter();
        ElementCounter elementCounter2 = new ElementCounter();
        ElementCounter elementCounter3 = new ElementCounter();
        ElementCounter elementCounter4 = new ElementCounter();
        ElementCounter elementCounter5 = new ElementCounter();
        ElementCounter elementCounter6 = new ElementCounter();
        for (Block block : this.blocks) {
            BoundingBox calculateOneBox = BoundingBoxCalculator.calculateOneBox(block.getTokens());
            if (calculateOneBox != null) {
                block.setBoundingBox(calculateOneBox);
            }
            if (block.getX() != XPath.MATCH_SCORE_QNAME && block.getHeight() >= 20.0d && block.getWidth() >= 20.0d && block.getHeight() * block.getWidth() >= 3000.0d) {
                if (block.getPageNumber() % 2 == 0) {
                    elementCounter.i(Integer.valueOf((int) block.getX()));
                    elementCounter2.i(Integer.valueOf((int) (block.getX() + block.getWidth())));
                } else {
                    elementCounter3.i(Integer.valueOf((int) block.getX()));
                    elementCounter4.i(Integer.valueOf((int) (block.getX() + block.getWidth())));
                }
                elementCounter5.i(Integer.valueOf((int) block.getY()));
                elementCounter6.i(Integer.valueOf((int) (block.getY() + block.getHeight())));
            }
        }
        if (elementCounter.getCnts().isEmpty() || elementCounter3.getCnts().isEmpty()) {
            for (Page page : this.pages) {
                page.setMainArea(BoundingBox.fromPointAndDimensions(page.getNumber(), XPath.MATCH_SCORE_QNAME, XPath.MATCH_SCORE_QNAME, page.getWidth(), page.getHeight()));
            }
            return;
        }
        int i = 0;
        int i2 = 0;
        if (this.pages.size() > 1) {
            i = getCoordItem(elementCounter, true);
            i2 = (getCoordItem(elementCounter2, false) - i) + 1;
        }
        int coordItem = getCoordItem(elementCounter3, true);
        int coordItem2 = (getCoordItem(elementCounter4, false) - coordItem) + 1;
        int coordItem3 = getCoordItem(elementCounter5, true);
        int coordItem4 = (getCoordItem(elementCounter6, false) - coordItem3) + 1;
        for (Page page2 : this.pages) {
            if (page2.isEven()) {
                page2.setMainArea(BoundingBox.fromPointAndDimensions(page2.getNumber(), i, coordItem3, i2, coordItem4));
            } else {
                page2.setMainArea(BoundingBox.fromPointAndDimensions(page2.getNumber(), coordItem, coordItem3, coordItem2, coordItem4));
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected ArrayList<GraphicObject> glueImagesIfNecessary(Integer num, List<GraphicObject> list) {
        ArrayList<Pair> arrayList = new ArrayList();
        int i = 0;
        int i2 = 0;
        for (int i3 = 1; i3 < list.size(); i3++) {
            GraphicObject graphicObject = list.get(i3 - 1);
            GraphicObject graphicObject2 = list.get(i3);
            if (graphicObject.getType() != GraphicObjectType.BITMAP || graphicObject2.getType() != GraphicObjectType.BITMAP) {
                if (i != i2) {
                    arrayList.add(new Pair(Integer.valueOf(i), Integer.valueOf(i2 + 1)));
                }
                i = i3;
                i2 = i;
            } else if (Utilities.doubleEquals(graphicObject.getBoundingBox().getWidth(), graphicObject2.getBoundingBox().getWidth(), 1.0E-4d) && Utilities.doubleEquals(graphicObject.getBoundingBox().getY2(), graphicObject2.getBoundingBox().getY(), 1.0E-4d)) {
                i2++;
            } else {
                if (i != i2) {
                    arrayList.add(new Pair(Integer.valueOf(i), Integer.valueOf(i2 + 1)));
                }
                i = i3;
                i2 = i;
            }
        }
        if (i != i2) {
            arrayList.add(new Pair(Integer.valueOf(i), Integer.valueOf(i2 + 1)));
        }
        if (arrayList.isEmpty()) {
            return null;
        }
        for (Pair pair : arrayList) {
            BoundingBox boundingBox = list.get(((Integer) pair.a).intValue()).getBoundingBox();
            for (int intValue = ((Integer) pair.a).intValue() + 1; intValue < ((Integer) pair.b).intValue(); intValue++) {
                boundingBox = boundingBox.boundBox(list.get(intValue).getBoundingBox());
            }
            list.set(((Integer) pair.a).intValue(), new GraphicObject(boundingBox, GraphicObjectType.VECTOR_BOX));
            for (int intValue2 = ((Integer) pair.a).intValue() + 1; intValue2 < ((Integer) pair.b).intValue(); intValue2++) {
                list.set(intValue2, null);
            }
        }
        this.validGraphicObjectPredicate = new Predicate<GraphicObject>() { // from class: org.grobid.core.document.Document.1
            @Override // com.google.common.base.Predicate
            public boolean apply(GraphicObject graphicObject3) {
                return graphicObject3 != null && Document.this.isValidBitmapGraphicObject(graphicObject3);
            }
        };
        return Lists.newArrayList(Iterables.filter(list, this.validGraphicObjectPredicate));
    }

    protected static int getCoordItem(ElementCounter<Integer> elementCounter, boolean z) {
        List<Map.Entry<Integer, Integer>> sortedCounts = elementCounter.getSortedCounts();
        sortedCounts.get(0).getValue().intValue();
        int intValue = sortedCounts.get(0).getKey().intValue();
        for (Map.Entry<Integer, Integer> entry : sortedCounts) {
            if (z) {
                if (entry.getKey().intValue() < intValue) {
                    intValue = entry.getKey().intValue();
                }
            } else if (entry.getKey().intValue() > intValue) {
                intValue = entry.getKey().intValue();
            }
        }
        return intValue;
    }

    public String getHeaderFeatured(boolean z, boolean z2) {
        String text;
        String header;
        if (z && ((header = getHeader()) == null || header.trim().length() <= 1)) {
            getHeaderLastHope();
        }
        this.featureFactory = FeatureFactory.getInstance();
        StringBuilder sb = new StringBuilder();
        String str = null;
        int i = -1;
        Iterator<Integer> it = this.blockDocumentHeaders.iterator();
        while (it.hasNext()) {
            boolean z3 = false;
            boolean z4 = false;
            List<LayoutToken> tokens = this.blocks.get(it.next().intValue()).getTokens();
            if (tokens != null) {
                int i2 = 0;
                while (i2 < tokens.size()) {
                    LayoutToken layoutToken = tokens.get(i2);
                    FeaturesVectorHeader featuresVectorHeader = new FeaturesVectorHeader();
                    featuresVectorHeader.token = layoutToken;
                    String text2 = layoutToken.getText();
                    if (text2 == null) {
                        i2++;
                    } else {
                        String replace = text2.replace(" ", "");
                        if (replace.length() == 0) {
                            i2++;
                        } else if (replace.equals("\n")) {
                            z3 = true;
                            i2++;
                        } else {
                            boolean z5 = false;
                            if (z3) {
                                z5 = true;
                                z3 = false;
                            }
                            String replaceAll = replace.replaceAll("[ \n]", "");
                            if (TextUtilities.filterLine(replaceAll)) {
                                i2++;
                            } else {
                                featuresVectorHeader.string = replaceAll;
                                if (z5) {
                                    featuresVectorHeader.lineStatus = "LINESTART";
                                }
                                if (this.featureFactory.isPunct.matcher(replaceAll).find()) {
                                    featuresVectorHeader.punctType = "PUNCT";
                                }
                                if (replaceAll.equals(TextUtilities.START_BRACKET) || replaceAll.equals("[")) {
                                    featuresVectorHeader.punctType = "OPENBRACKET";
                                } else if (replaceAll.equals(TextUtilities.END_BRACKET) || replaceAll.equals("]")) {
                                    featuresVectorHeader.punctType = "ENDBRACKET";
                                } else if (replaceAll.equals(Constants.ATTRVAL_THIS)) {
                                    featuresVectorHeader.punctType = "DOT";
                                } else if (replaceAll.equals(TextUtilities.COMMA)) {
                                    featuresVectorHeader.punctType = "COMMA";
                                } else if (replaceAll.equals("-")) {
                                    featuresVectorHeader.punctType = "HYPHEN";
                                } else if (replaceAll.equals("\"") || replaceAll.equals("'") || replaceAll.equals("`")) {
                                    featuresVectorHeader.punctType = "QUOTE";
                                }
                                if (i2 == 0) {
                                    featuresVectorHeader.lineStatus = "LINESTART";
                                    featuresVectorHeader.blockStatus = "BLOCKSTART";
                                } else if (i2 == tokens.size() - 1) {
                                    featuresVectorHeader.lineStatus = "LINEEND";
                                    z3 = true;
                                    featuresVectorHeader.blockStatus = "BLOCKEND";
                                    z4 = true;
                                } else {
                                    boolean z6 = false;
                                    boolean z7 = false;
                                    for (int i3 = 1; i2 + i3 < tokens.size() && !z7; i3++) {
                                        LayoutToken layoutToken2 = tokens.get(i2 + i3);
                                        if (layoutToken2 != null && (text = layoutToken2.getText()) != null) {
                                            if (text.equals("\n")) {
                                                z6 = true;
                                                z7 = true;
                                            } else if (text.trim().length() != 0 && !replaceAll.equals(" ") && !text.contains("@IMAGE") && !text.contains("@PAGE") && !replaceAll.contains(".pbm") && !replaceAll.contains(".ppm") && !replaceAll.contains(".svg") && !replaceAll.contains(".png") && !replaceAll.contains(".jpg")) {
                                                z7 = true;
                                            }
                                        }
                                        if (i2 + i3 == tokens.size() - 1) {
                                            z4 = true;
                                            z6 = true;
                                        }
                                    }
                                    if (!z6 && !z5) {
                                        featuresVectorHeader.lineStatus = "LINEIN";
                                    } else if (!z5) {
                                        featuresVectorHeader.lineStatus = "LINEEND";
                                        z3 = true;
                                    }
                                    if (!z4 && featuresVectorHeader.blockStatus == null) {
                                        featuresVectorHeader.blockStatus = "BLOCKIN";
                                    } else if (featuresVectorHeader.blockStatus == null) {
                                        featuresVectorHeader.blockStatus = "BLOCKEND";
                                    }
                                }
                                if (replaceAll.length() == 1) {
                                    featuresVectorHeader.singleChar = true;
                                }
                                if (Character.isUpperCase(replaceAll.charAt(0))) {
                                    featuresVectorHeader.capitalisation = "INITCAP";
                                }
                                if (this.featureFactory.test_all_capital(replaceAll)) {
                                    featuresVectorHeader.capitalisation = "ALLCAP";
                                }
                                FeatureFactory featureFactory = this.featureFactory;
                                if (FeatureFactory.test_digit(replaceAll)) {
                                    featuresVectorHeader.digit = "CONTAINSDIGITS";
                                }
                                if (this.featureFactory.test_common(replaceAll)) {
                                    featuresVectorHeader.commonName = true;
                                }
                                if (this.featureFactory.test_names(replaceAll)) {
                                    featuresVectorHeader.properName = true;
                                }
                                if (this.featureFactory.test_month(replaceAll)) {
                                    featuresVectorHeader.month = true;
                                }
                                if (replaceAll.contains("-")) {
                                    featuresVectorHeader.containDash = true;
                                }
                                if (this.featureFactory.isDigit.matcher(replaceAll).find()) {
                                    featuresVectorHeader.digit = "ALLDIGIT";
                                }
                                if (this.featureFactory.year.matcher(replaceAll).find()) {
                                    featuresVectorHeader.year = true;
                                }
                                if (this.featureFactory.email.matcher(replaceAll).find()) {
                                    featuresVectorHeader.email = true;
                                }
                                if (this.featureFactory.http.matcher(replaceAll).find()) {
                                    featuresVectorHeader.http = true;
                                }
                                if (str == null) {
                                    str = layoutToken.getFont();
                                    featuresVectorHeader.fontStatus = "NEWFONT";
                                } else if (str.equals(layoutToken.getFont())) {
                                    featuresVectorHeader.fontStatus = "SAMEFONT";
                                } else {
                                    str = layoutToken.getFont();
                                    featuresVectorHeader.fontStatus = "NEWFONT";
                                }
                                int fontSize = (int) layoutToken.getFontSize();
                                if (i == -1) {
                                    i = fontSize;
                                    featuresVectorHeader.fontSize = "HIGHERFONT";
                                } else if (i == fontSize) {
                                    featuresVectorHeader.fontSize = "SAMEFONTSIZE";
                                } else if (i < fontSize) {
                                    featuresVectorHeader.fontSize = "HIGHERFONT";
                                    i = fontSize;
                                } else if (i > fontSize) {
                                    featuresVectorHeader.fontSize = "LOWERFONT";
                                    i = fontSize;
                                }
                                if (layoutToken.getBold()) {
                                    featuresVectorHeader.bold = true;
                                }
                                if (layoutToken.getItalic()) {
                                    featuresVectorHeader.italic = true;
                                }
                                if (layoutToken.getRotation()) {
                                    featuresVectorHeader.rotation = true;
                                }
                                if (featuresVectorHeader.capitalisation == null) {
                                    featuresVectorHeader.capitalisation = "NOCAPS";
                                }
                                if (featuresVectorHeader.digit == null) {
                                    featuresVectorHeader.digit = "NODIGIT";
                                }
                                if (featuresVectorHeader.punctType == null) {
                                    featuresVectorHeader.punctType = "NOPUNCT";
                                }
                                sb.append(featuresVectorHeader.printVector(z2));
                                i2++;
                            }
                        }
                    }
                }
            }
        }
        return sb.toString();
    }

    public String getHeader() {
        String headerByIntroduction = getHeaderByIntroduction();
        if (headerByIntroduction != null && headerByIntroduction.trim().length() > 0) {
            return headerByIntroduction;
        }
        String str = null;
        this.beginBody = -1;
        StringBuilder sb = new StringBuilder();
        int i = 0;
        int i2 = 0;
        boolean z = false;
        for (Block block : this.blocks) {
            String text = block.getText();
            if (text == null || text.startsWith("@")) {
                sb.append("\n");
            } else {
                String replace = text.trim().replace("  ", " ");
                Matcher matcher = BasicStructureBuilder.abstract_.matcher(replace);
                if (block.getNbTokens() <= 60 && !matcher.find()) {
                    Matcher matcher2 = BasicStructureBuilder.introduction.matcher(replace);
                    if (z) {
                        if (matcher2.find()) {
                            this.beginBody = i;
                            for (int i3 = 0; i3 <= i; i3++) {
                                Integer valueOf = Integer.valueOf(i3);
                                if (this.blockDocumentHeaders == null) {
                                    this.blockDocumentHeaders = new ArrayList();
                                }
                                if (!this.blockDocumentHeaders.contains(valueOf)) {
                                    this.blockDocumentHeaders.add(valueOf);
                                }
                            }
                            return sb.toString();
                        }
                        if (this.beginBody != -1 && replace.startsWith("(1|I|A)\\.\\s")) {
                            this.beginBody = i;
                            for (int i4 = 0; i4 <= i; i4++) {
                                Integer valueOf2 = Integer.valueOf(i4);
                                if (this.blockDocumentHeaders == null) {
                                    this.blockDocumentHeaders = new ArrayList();
                                }
                                if (!this.blockDocumentHeaders.contains(valueOf2)) {
                                    this.blockDocumentHeaders.add(valueOf2);
                                }
                            }
                            return sb.toString();
                        }
                    } else if (matcher2.find()) {
                        this.beginBody = i;
                        for (int i5 = 0; i5 <= i; i5++) {
                            Integer valueOf3 = Integer.valueOf(i5);
                            if (this.blockDocumentHeaders == null) {
                                this.blockDocumentHeaders = new ArrayList();
                            }
                            if (!this.blockDocumentHeaders.contains(valueOf3)) {
                                this.blockDocumentHeaders.add(valueOf3);
                            }
                        }
                        str = sb.toString();
                    }
                } else if (!z) {
                    z = true;
                } else if (this.beginBody == -1) {
                    this.beginBody = i;
                    for (int i6 = 0; i6 <= i + 1; i6++) {
                        Integer valueOf4 = Integer.valueOf(i6);
                        if (this.blockDocumentHeaders == null) {
                            this.blockDocumentHeaders = new ArrayList();
                        }
                        if (!this.blockDocumentHeaders.contains(valueOf4)) {
                            this.blockDocumentHeaders.add(valueOf4);
                        }
                    }
                    str = sb.toString();
                    i2 = 1;
                } else if (block.getNbTokens() > 60) {
                    i2++;
                    if (i2 > 5) {
                        return str;
                    }
                }
                if (i > 6 && i > this.blocks.size() * 0.6d) {
                    if (this.beginBody != -1) {
                        return str;
                    }
                    return null;
                }
                sb.append(replace).append("\n");
                i++;
            }
        }
        return str;
    }

    public String getHeaderLastHope() {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        if (this.pages == null || this.pages.size() == 0) {
            return null;
        }
        Iterator<Page> it = this.pages.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Page next = it.next();
            if (next.getBlocks() != null && next.getBlocks().size() != 0) {
                Iterator<Block> it2 = next.getBlocks().iterator();
                while (it2.hasNext()) {
                    String text = it2.next().getText();
                    if (text == null || text.startsWith("@")) {
                        sb.append("\n");
                    } else {
                        sb.append(text.trim().replace("  ", " "));
                        Integer valueOf = Integer.valueOf(i);
                        if (this.blockDocumentHeaders == null) {
                            this.blockDocumentHeaders = new ArrayList();
                        }
                        if (!this.blockDocumentHeaders.contains(valueOf)) {
                            this.blockDocumentHeaders.add(valueOf);
                        }
                        i++;
                    }
                }
                this.beginBody = i;
            }
        }
        return sb.toString();
    }

    public String getHeaderByIntroduction() {
        StringBuilder sb = new StringBuilder();
        int i = 0;
        Iterator<Block> it = this.blocks.iterator();
        while (it.hasNext()) {
            String text = it.next().getText();
            if (text == null || text.startsWith("@")) {
                sb.append("\n");
            } else {
                String trim = text.trim();
                if (BasicStructureBuilder.introductionStrict.matcher(trim).find()) {
                    sb.append(trim);
                    this.beginBody = i;
                    for (int i2 = 0; i2 < i + 1; i2++) {
                        Integer valueOf = Integer.valueOf(i2);
                        if (this.blockDocumentHeaders == null) {
                            this.blockDocumentHeaders = new ArrayList();
                        }
                        if (!this.blockDocumentHeaders.contains(valueOf)) {
                            this.blockDocumentHeaders.add(valueOf);
                        }
                    }
                    return sb.toString();
                }
                sb.append(trim);
                i++;
            }
        }
        return null;
    }

    public String getBody() {
        String text;
        String text2;
        String text3;
        StringBuilder sb = new StringBuilder();
        if (this.blockFooters == null) {
            this.blockFooters = new ArrayList();
        }
        if (this.blockHeaders == null) {
            this.blockHeaders = new ArrayList();
        }
        int i = 0;
        boolean z = false;
        Iterator<Block> it = this.blocks.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Block next = it.next();
            Integer valueOf = Integer.valueOf(i);
            if (this.blockDocumentHeaders.contains(valueOf) && (text3 = next.getText()) != null && text3.trim().replace("  ", " ").startsWith("DOI: 10.1002")) {
                z = true;
            }
            if (!this.blockFooters.contains(valueOf)) {
                if (((!this.blockDocumentHeaders.contains(valueOf)) & (!this.blockHeaders.contains(valueOf))) && z && (text2 = next.getText()) != null && text2.trim().replace("  ", " ").startsWith("Keywords: ")) {
                    this.blockDocumentHeaders.add(Integer.valueOf(i - 1));
                    this.blockDocumentHeaders.add(valueOf);
                    break;
                }
            }
            i++;
        }
        int i2 = 0;
        for (Block block : this.blocks) {
            Integer valueOf2 = Integer.valueOf(i2);
            if (this.blockFooters == null) {
                this.blockFooters = new ArrayList();
            }
            if (this.blockDocumentHeaders == null) {
                this.blockDocumentHeaders = new ArrayList();
            }
            if (this.blockHeaders == null) {
                this.blockHeaders = new ArrayList();
            }
            if (this.blockReferences == null) {
                this.blockReferences = new TreeSet();
            }
            if (!this.blockFooters.contains(valueOf2) && !this.blockDocumentHeaders.contains(valueOf2) && !this.blockHeaders.contains(valueOf2) && !this.blockReferences.contains(valueOf2) && (text = block.getText()) != null) {
                String trim = text.trim();
                if (trim.startsWith("@IMAGE")) {
                    trim = "";
                }
                if (trim.length() > 0) {
                    if (this.featureFactory == null) {
                        this.featureFactory = FeatureFactory.getInstance();
                    }
                    sb.append(TextUtilities.dehyphenize(trim)).append("\n");
                }
            }
            i2++;
        }
        return sb.toString();
    }

    public String getAllBlocksClean(int i, int i2) {
        StringBuilder sb = new StringBuilder();
        if (i2 == -1) {
            i2 = this.blocks.size() + 1;
        }
        int i3 = 0;
        if (this.blocks != null) {
            for (Block block : this.blocks) {
                if (i3 >= i && i3 < i2) {
                    sb.append(block.getText()).append("\n");
                }
                i3++;
            }
        }
        return sb.toString();
    }

    public List<String> getDOIMatches() {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (Page page : getPages()) {
            if (page.getBlocks() != null && page.getBlocks().size() > 0) {
                for (int i2 = 0; i2 < page.getBlocks().size(); i2++) {
                    String text = page.getBlocks().get(i2).getText();
                    if (text != null && text.length() > 0) {
                        Matcher matcher = TextUtilities.DOIPattern.matcher(text.trim());
                        while (matcher.find()) {
                            String group = matcher.group();
                            if (!arrayList.contains(group)) {
                                arrayList.add(group);
                            }
                        }
                    }
                }
            }
            if (i > 1) {
                break;
            }
            i++;
        }
        return arrayList;
    }

    public String getTei() {
        return this.tei;
    }

    public void setTei(String str) {
        this.tei = str;
    }

    public List<Integer> getBlockDocumentHeaders() {
        return this.blockDocumentHeaders;
    }

    public DocumentNode getOutlineRoot() {
        return this.outlineRoot;
    }

    public void setOutlineRoot(DocumentNode documentNode) {
        this.outlineRoot = documentNode;
    }

    public boolean isTitleMatchNum() {
        return this.titleMatchNum;
    }

    public void setTitleMatchNum(boolean z) {
        this.titleMatchNum = z;
    }

    public List<Page> getPages() {
        return this.pages;
    }

    public Page getPage(int i) {
        return this.pages.get(i - 1);
    }

    public List<Cluster> getClusters() {
        return this.clusters;
    }

    public void setBlockHeaders(List<Integer> list) {
        this.blockHeaders = list;
    }

    public void setBlockFooters(List<Integer> list) {
        this.blockFooters = list;
    }

    public void setBlockSectionTitles(List<Integer> list) {
        this.blockSectionTitles = list;
    }

    public void setAcknowledgementBlocks(List<Integer> list) {
        this.acknowledgementBlocks = list;
    }

    public void setBlockDocumentHeaders(List<Integer> list) {
        this.blockDocumentHeaders = list;
    }

    public void setBlockReferences(SortedSet<DocumentPiece> sortedSet) {
        this.blockReferences = sortedSet;
    }

    public void setBlockTables(List<Integer> list) {
        this.blockTables = list;
    }

    public void setBlockFigures(List<Integer> list) {
        this.blockFigures = list;
    }

    public void setBlockHeadTables(List<Integer> list) {
        this.blockHeadTables = list;
    }

    public void setBlockHeadFigures(List<Integer> list) {
        this.blockHeadFigures = list;
    }

    public void setClusters(List<Cluster> list) {
        this.clusters = list;
    }

    public void setPages(List<Page> list) {
        this.pages = list;
    }

    public void addPage(Page page) {
        if (this.pages == null) {
            this.pages = new ArrayList();
        }
        this.pages.add(page);
    }

    public void setBibDataSets(List<BibDataSet> list) {
        this.bibDataSets = list;
        if (this.bibDataSets != null) {
            for (BibDataSet bibDataSet : this.bibDataSets) {
                String refSymbol = bibDataSet.getRefSymbol();
                if (refSymbol != null) {
                    bibDataSet.setRefSymbol(refSymbol.replaceAll("[\\.\\[\\]()\\-\\s]", ""));
                }
            }
        }
        int i = 0;
        Iterator<BibDataSet> it = list.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            it.next().getResBib().setOrdinal(i2);
        }
    }

    public synchronized ReferenceMarkerMatcher getReferenceMarkerMatcher() throws EntityMatcherException {
        if (this.referenceMarkerMatcher == null && this.bibDataSets != null) {
            this.referenceMarkerMatcher = new ReferenceMarkerMatcher(this.bibDataSets, Engine.getCntManager());
        }
        return this.referenceMarkerMatcher;
    }

    public void calculateTeiIdToBibDataSets() {
        if (this.bibDataSets == null) {
            return;
        }
        this.teiIdToBibDataSets = new HashMap(this.bibDataSets.size());
        for (BibDataSet bibDataSet : this.bibDataSets) {
            if (bibDataSet.getResBib() != null && bibDataSet.getResBib().getTeiId() != null) {
                this.teiIdToBibDataSets.put(bibDataSet.getResBib().getTeiId(), bibDataSet);
            }
        }
    }

    public SortedSetMultimap<String, DocumentPiece> getLabeledBlocks() {
        return this.labeledBlocks;
    }

    public void setLabeledBlocks(SortedSetMultimap<String, DocumentPiece> sortedSetMultimap) {
        this.labeledBlocks = sortedSetMultimap;
    }

    public List<LayoutToken> getDocumentPieceTokenization(DocumentPiece documentPiece) {
        return this.tokenizations.subList(documentPiece.getLeft().getTokenDocPos(), documentPiece.getRight().getTokenDocPos() + 1);
    }

    public String getDocumentPieceText(DocumentPiece documentPiece) {
        return Joiner.on("").join(getDocumentPieceTokenization(documentPiece));
    }

    public String getDocumentPieceText(SortedSet<DocumentPiece> sortedSet) {
        return Joiner.on("\n").join(Iterables.transform(sortedSet, new Function<DocumentPiece, Object>() { // from class: org.grobid.core.document.Document.2
            @Override // com.google.common.base.Function, java.util.function.Function
            public String apply(DocumentPiece documentPiece) {
                return Document.this.getDocumentPieceText(documentPiece);
            }
        }));
    }

    public SortedSet<DocumentPiece> getDocumentPart(TaggingLabel taggingLabel) {
        if (this.labeledBlocks == null) {
            LOGGER.debug("labeledBlocks is null");
            return null;
        }
        if (taggingLabel.getLabel() == null) {
            System.out.println("segmentationLabel.getLabel()  is null");
        }
        return this.labeledBlocks.get((SortedSetMultimap<String, DocumentPiece>) taggingLabel.getLabel());
    }

    public String getDocumentPartText(TaggingLabel taggingLabel) {
        if (getDocumentPart(taggingLabel) == null) {
            return null;
        }
        return getDocumentPieceText(getDocumentPart(taggingLabel));
    }

    public static List<LayoutToken> getTokenizationParts(SortedSet<DocumentPiece> sortedSet, List<LayoutToken> list) {
        if (sortedSet == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (DocumentPiece documentPiece : sortedSet) {
            DocumentPointer left = documentPiece.getLeft();
            DocumentPointer right = documentPiece.getRight();
            int tokenDocPos = left.getTokenDocPos();
            int tokenDocPos2 = right.getTokenDocPos();
            for (int i = tokenDocPos; i < tokenDocPos2; i++) {
                arrayList.add(list.get(i));
            }
        }
        return arrayList;
    }

    public BibDataSet getBibDataSetByTeiId(String str) {
        return this.teiIdToBibDataSets.get(str);
    }

    public static List<GraphicObject> getConnectedGraphics(Block block, Document document) {
        ArrayList arrayList = null;
        for (GraphicObject graphicObject : document.getImages()) {
            if (block.getPageNumber() == graphicObject.getPage() && (Math.abs((graphicObject.getY() + graphicObject.getHeight()) - block.getY()) < MIN_DISTANCE || Math.abs(graphicObject.getY() - (block.getY() + block.getHeight())) < MIN_DISTANCE)) {
                if (arrayList == null) {
                    arrayList = new ArrayList();
                }
                arrayList.add(graphicObject);
            }
        }
        return arrayList;
    }

    public void postProcessTables() {
        for (Table table : this.tables) {
            if (table.firstCheck()) {
                ArrayList arrayList = new ArrayList();
                BoundingBox fromLayoutToken = BoundingBox.fromLayoutToken(table.getFullDescriptionTokens().get(0));
                Iterator<LayoutToken> it = table.getFullDescriptionTokens().iterator();
                while (true) {
                    if (!it.hasNext()) {
                        break;
                    }
                    LayoutToken next = it.next();
                    BoundingBox fromLayoutToken2 = BoundingBox.fromLayoutToken(next);
                    if (fromLayoutToken2.getX() < XPath.MATCH_SCORE_QNAME) {
                        arrayList.add(next);
                    } else if (fromLayoutToken2.distanceTo(fromLayoutToken) > 200) {
                        Engine.getCntManager().i(TableRejectionCounters.HEADER_NOT_CONSECUTIVE);
                        table.setGoodTable(false);
                        break;
                    } else {
                        fromLayoutToken = fromLayoutToken.boundBox(fromLayoutToken2);
                        arrayList.add(next);
                    }
                }
                table.getFullDescriptionTokens().clear();
                table.getFullDescriptionTokens().addAll(arrayList);
                ArrayList arrayList2 = new ArrayList();
                BoundingBox fromLayoutToken3 = BoundingBox.fromLayoutToken(table.getContentTokens().get(0));
                for (LayoutToken layoutToken : table.getContentTokens()) {
                    BoundingBox fromLayoutToken4 = BoundingBox.fromLayoutToken(layoutToken);
                    if (fromLayoutToken4.getX() < XPath.MATCH_SCORE_QNAME) {
                        arrayList2.add(layoutToken);
                    } else {
                        if (fromLayoutToken4.distanceTo(fromLayoutToken3) > 200) {
                            break;
                        }
                        fromLayoutToken3 = fromLayoutToken3.boundBox(fromLayoutToken4);
                        arrayList2.add(layoutToken);
                    }
                }
                table.getContentTokens().clear();
                table.getContentTokens().addAll(arrayList2);
                table.secondCheck();
            }
        }
    }

    public void assignGraphicObjectsToFigures() {
        HashMultimap create = HashMultimap.create();
        for (Figure figure : this.figures) {
            create.put(Integer.valueOf(figure.getPage()), figure);
        }
        for (K k : create.keySet()) {
            ArrayList<Figure> arrayList = new ArrayList();
            for (V v : create.get((HashMultimap) k)) {
                List<LayoutToken> figureLayoutTokens = getFigureLayoutTokens(v);
                if (figureLayoutTokens != null && !figureLayoutTokens.isEmpty()) {
                    v.setLayoutTokens(figureLayoutTokens);
                    v.setTextArea(BoundingBoxCalculator.calculate(figureLayoutTokens));
                    v.setCaption(new StringBuilder(LayoutTokensUtil.toText(LayoutTokensUtil.dehyphenize(figureLayoutTokens))));
                    arrayList.add(v);
                }
            }
            if (!arrayList.isEmpty()) {
                List<GraphicObject> list = (List) Lists.newArrayList(Iterables.filter(this.imagesPerPage.get(k), Figure.GRAPHIC_OBJECT_PREDICATE)).stream().filter(graphicObject -> {
                    return getPage(graphicObject.getBoundingBox().getPage()).getMainArea().intersect(graphicObject.getBoundingBox());
                }).collect(Collectors.toList());
                List list2 = (List) Lists.newArrayList(Iterables.filter(this.imagesPerPage.get(k), Figure.VECTOR_BOX_GRAPHIC_OBJECT_PREDICATE)).stream().filter(graphicObject2 -> {
                    Iterator it = arrayList.iterator();
                    while (it.hasNext()) {
                        BoundingBox boundingBoxIntersection = BoundingBoxCalculator.calculateOneBox(((Figure) it.next()).getLayoutTokens(), true).boundingBoxIntersection(graphicObject2.getBoundingBox());
                        if (boundingBoxIntersection != null && boundingBoxIntersection.area() / graphicObject2.getBoundingBox().area() > 0.5d) {
                            return false;
                        }
                    }
                    return true;
                }).collect(Collectors.toList());
                ArrayList<GraphicObject> arrayList2 = new ArrayList();
                for (GraphicObject graphicObject3 : list) {
                    Iterator it = list2.iterator();
                    while (true) {
                        if (it.hasNext()) {
                            if (graphicObject3.getBoundingBox().intersect(((GraphicObject) it.next()).getBoundingBox())) {
                                break;
                            }
                        } else {
                            arrayList2.add(graphicObject3);
                            break;
                        }
                    }
                }
                arrayList2.addAll(list2);
                if (list2.isEmpty()) {
                    for (Figure figure2 : arrayList) {
                        BoundingBox calculateOneBox = BoundingBoxCalculator.calculateOneBox(figure2.getLayoutTokens(), true);
                        double d = 7000.0d;
                        GraphicObject graphicObject4 = null;
                        if (calculateOneBox != null) {
                            for (GraphicObject graphicObject5 : arrayList2) {
                                if (!graphicObject5.isUsed() && !graphicObject5.getBoundingBox().contains(calculateOneBox) && isValidBitmapGraphicObject(graphicObject5)) {
                                    double distanceTo = calculateOneBox.distanceTo(graphicObject5.getBoundingBox());
                                    if (distanceTo <= 70.0d && distanceTo < d) {
                                        d = distanceTo;
                                        graphicObject4 = graphicObject5;
                                    }
                                }
                            }
                        }
                        if (graphicObject4 != null) {
                            graphicObject4.setUsed(true);
                            figure2.setGraphicObjects(Lists.newArrayList(graphicObject4));
                            Engine.getCntManager().i("FigureCounters", "ASSIGNED_GRAPHICS_TO_FIGURES");
                        }
                    }
                } else if (arrayList.size() != arrayList2.size()) {
                    Engine.getCntManager().i(FigureCounters.SKIPPED_DUE_TO_MISMATCH_OF_CAPTIONS_AND_VECTOR_AND_BITMAP_GRAPHICS);
                } else {
                    for (Figure figure3 : arrayList) {
                        BoundingBox calculateOneBox2 = BoundingBoxCalculator.calculateOneBox(figure3.getLayoutTokens(), true);
                        double d2 = 7000.0d;
                        GraphicObject graphicObject6 = null;
                        if (calculateOneBox2 != null) {
                            for (GraphicObject graphicObject7 : arrayList2) {
                                if (!graphicObject7.isUsed()) {
                                    BoundingBox boundingBox = graphicObject7.getBoundingBox();
                                    if (getPage(boundingBox.getPage()).getMainArea().contains(boundingBox) || graphicObject7.getWidth() * graphicObject7.getHeight() >= 10000.0d) {
                                        if (graphicObject7.getType() != GraphicObjectType.BITMAP || isValidBitmapGraphicObject(graphicObject7)) {
                                            double distanceTo2 = calculateOneBox2.distanceTo(boundingBox);
                                            if (distanceTo2 <= 70.0d && distanceTo2 < d2) {
                                                d2 = distanceTo2;
                                                graphicObject6 = graphicObject7;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        if (graphicObject6 != null) {
                            graphicObject6.setUsed(true);
                            if (graphicObject6.getType() == GraphicObjectType.VECTOR_BOX) {
                                recalculateVectorBoxCoords(figure3, graphicObject6);
                            }
                            figure3.setGraphicObjects(Lists.newArrayList(graphicObject6));
                            Engine.getCntManager().i("FigureCounters", "ASSIGNED_GRAPHICS_TO_FIGURES");
                        }
                    }
                }
            }
        }
        int size = this.pages.size();
        for (int i = 1; i <= size; i++) {
            if (!create.containsKey(Integer.valueOf(i))) {
                ArrayList newArrayList = Lists.newArrayList(Iterables.filter(this.imagesPerPage.get(Integer.valueOf(i)), Figure.GRAPHIC_OBJECT_PREDICATE));
                ArrayList newArrayList2 = Lists.newArrayList(Iterables.filter(this.imagesPerPage.get(Integer.valueOf(i)), Figure.VECTOR_BOX_GRAPHIC_OBJECT_PREDICATE));
                ArrayList<GraphicObject> arrayList3 = new ArrayList();
                Iterator it2 = newArrayList.iterator();
                while (it2.hasNext()) {
                    GraphicObject graphicObject8 = (GraphicObject) it2.next();
                    Iterator it3 = newArrayList2.iterator();
                    while (true) {
                        if (it3.hasNext()) {
                            if (graphicObject8.getBoundingBox().intersect(((GraphicObject) it3.next()).getBoundingBox())) {
                                break;
                            }
                        } else {
                            Iterator it4 = newArrayList.iterator();
                            while (true) {
                                if (!it4.hasNext()) {
                                    arrayList3.add(graphicObject8);
                                    break;
                                } else {
                                    GraphicObject graphicObject9 = (GraphicObject) it4.next();
                                    if (graphicObject9 == graphicObject8 || !graphicObject8.getBoundingBox().intersect(graphicObject9.getBoundingBox())) {
                                    }
                                }
                            }
                        }
                    }
                }
                arrayList3.addAll(newArrayList2);
                if (arrayList3.size() == newArrayList.size()) {
                    for (GraphicObject graphicObject10 : arrayList3) {
                        if (badStandaloneFigure(graphicObject10)) {
                            Engine.getCntManager().i(FigureCounters.SKIPPED_BAD_STANDALONE_FIGURES);
                        } else {
                            Figure figure4 = new Figure();
                            figure4.setPage(i);
                            figure4.setGraphicObjects(Collections.singletonList(graphicObject10));
                            this.figures.add(figure4);
                            Engine.getCntManager().i("FigureCounters", "STANDALONE_FIGURES");
                            LOGGER.info("Standalone figure on page: " + i);
                        }
                    }
                }
            }
        }
    }

    private boolean badStandaloneFigure(GraphicObject graphicObject) {
        if (graphicObject.getBoundingBox().area() < 50000.0d) {
            Engine.getCntManager().i(FigureCounters.SKIPPED_SMALL_STANDALONE_FIGURES);
            return true;
        }
        if (graphicObject.getBoundingBox().area() / this.pages.get(graphicObject.getPage() - 1).getMainArea().area() <= 0.6d) {
            return false;
        }
        Engine.getCntManager().i(FigureCounters.SKIPPED_BIG_STANDALONE_FIGURES);
        return true;
    }

    protected boolean isValidBitmapGraphicObject(GraphicObject graphicObject) {
        if (graphicObject.getWidth() * graphicObject.getHeight() >= 1000.0d && graphicObject.getWidth() >= 50.0d && graphicObject.getHeight() >= 50.0d) {
            return getPage(graphicObject.getBoundingBox().getPage()).getMainArea().contains(graphicObject.getBoundingBox()) || graphicObject.getWidth() * graphicObject.getHeight() >= 10000.0d;
        }
        return false;
    }

    /* JADX WARN: Code restructure failed: missing block: B:10:0x009e, code lost:
    
        if (r0.area() > r34.area()) goto L11;
     */
    /* JADX WARN: Code restructure failed: missing block: B:16:0x00cf, code lost:
    
        if (r0.area() > r34.area()) goto L18;
     */
    /* JADX WARN: Code restructure failed: missing block: B:22:0x0100, code lost:
    
        if (r0.area() > r34.area()) goto L25;
     */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected void recalculateVectorBoxCoords(org.grobid.core.data.Figure r13, org.grobid.core.layout.GraphicObject r14) {
        /*
            Method dump skipped, instructions count: 292
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.grobid.core.document.Document.recalculateVectorBoxCoords(org.grobid.core.data.Figure, org.grobid.core.layout.GraphicObject):void");
    }

    protected List<LayoutToken> getFigureLayoutTokens(Figure figure) {
        ArrayList arrayList = new ArrayList();
        Iterator<Integer> it = figure.getBlockPtrs().iterator();
        while (it.hasNext()) {
            Block block = getBlocks().get(it.next().intValue());
            String lowerCase = LayoutTokensUtil.toText(block.getTokens()).trim().toLowerCase();
            if (lowerCase.startsWith("fig") || lowerCase.startsWith("abb") || lowerCase.startsWith("scheme") || lowerCase.startsWith("photo") || lowerCase.startsWith("gambar") || lowerCase.startsWith("quadro") || lowerCase.startsWith("wykres") || lowerCase.startsWith("fuente")) {
                arrayList.addAll(block.getTokens());
                while (it.hasNext()) {
                    BoundingBox fromPointAndDimensions = BoundingBox.fromPointAndDimensions(block.getPageNumber(), block.getX(), block.getY(), block.getWidth(), block.getHeight());
                    Block block2 = getBlocks().get(it.next().intValue());
                    if (BoundingBox.fromPointAndDimensions(block2.getPageNumber(), block2.getX(), block2.getY(), block2.getWidth(), block2.getHeight()).distanceTo(fromPointAndDimensions) >= 15.0d) {
                        break;
                    }
                    arrayList.addAll(block2.getTokens());
                    block = block2;
                }
                return arrayList;
            }
        }
        return arrayList;
    }

    public void setConnectedGraphics2(Figure figure) {
        List<LayoutToken> layoutTokens = figure.getLayoutTokens();
        figure.setTextArea(BoundingBoxCalculator.calculate(layoutTokens));
        BoundingBox calculateOneBox = BoundingBoxCalculator.calculateOneBox(layoutTokens, true);
        double d = 7000.0d;
        GraphicObject graphicObject = null;
        if (calculateOneBox != null) {
            for (GraphicObject graphicObject2 : this.imagesPerPage.get(Integer.valueOf(figure.getPage()))) {
                if (graphicObject2.getType() == GraphicObjectType.BITMAP && !graphicObject2.isUsed()) {
                    BoundingBox fromPointAndDimensions = BoundingBox.fromPointAndDimensions(graphicObject2.getPage(), graphicObject2.getX(), graphicObject2.getY(), graphicObject2.getWidth(), graphicObject2.getHeight());
                    if (getPage(fromPointAndDimensions.getPage()).getMainArea().contains(fromPointAndDimensions)) {
                        double distanceTo = calculateOneBox.distanceTo(fromPointAndDimensions);
                        if (distanceTo <= 70.0d && distanceTo < d) {
                            d = distanceTo;
                            graphicObject = graphicObject2;
                        }
                    }
                }
            }
        }
        if (graphicObject != null) {
            graphicObject.setUsed(true);
            figure.setGraphicObjects(Lists.newArrayList(graphicObject));
        }
    }

    public void produceStatistics() {
        String text;
        Iterator<Block> it = this.blocks.iterator();
        while (it.hasNext()) {
            List<LayoutToken> tokens = it.next().getTokens();
            if (tokens != null) {
                this.documentLenghtChar += tokens.size();
            }
        }
        this.maxBlockSpacing = XPath.MATCH_SCORE_QNAME;
        this.minBlockSpacing = 10000.0d;
        Double valueOf = Double.valueOf(XPath.MATCH_SCORE_QNAME);
        for (Page page : this.pages) {
            int i = 0;
            if (page.getBlocks() != null && page.getBlocks().size() > 0) {
                for (int i2 = 0; i2 < page.getBlocks().size(); i2++) {
                    Block block = page.getBlocks().get(i2);
                    if (i2 != 0 && valueOf.doubleValue() > XPath.MATCH_SCORE_QNAME) {
                        double y = block.getY() - valueOf.doubleValue();
                        if (y > XPath.MATCH_SCORE_QNAME && y < page.getHeight()) {
                            if (y > this.maxBlockSpacing) {
                                this.maxBlockSpacing = y;
                            } else if (y < this.minBlockSpacing) {
                                this.minBlockSpacing = y;
                            }
                        }
                    }
                    valueOf = Double.valueOf(block.getY() + block.getHeight());
                    if (block.getTokens() != null) {
                        i += block.getTokens().size();
                    }
                }
            }
            page.setPageLengthChar(i);
        }
        this.maxCharacterDensity = XPath.MATCH_SCORE_QNAME;
        this.minCharacterDensity = 1000000.0d;
        for (Block block2 : this.blocks) {
            if (block2.getHeight() != XPath.MATCH_SCORE_QNAME && block2.getWidth() != XPath.MATCH_SCORE_QNAME && (text = block2.getText()) != null && !text.contains("@PAGE") && !text.contains("@IMAGE")) {
                double length = text.length() / (block2.getWidth() * block2.getHeight());
                if (length < this.minCharacterDensity) {
                    this.minCharacterDensity = length;
                }
                if (length > this.maxCharacterDensity) {
                    this.maxCharacterDensity = length;
                }
            }
        }
    }

    public DocumentSource getDocumentSource() {
        return this.documentSource;
    }

    public void setFigures(List<Figure> list) {
        this.figures = list;
    }

    public List<Figure> getFigures() {
        return this.figures;
    }

    public void setTables(List<Table> list) {
        this.tables = list;
    }

    public List<Table> getTables() {
        return this.tables;
    }

    public void setEquations(List<Equation> list) {
        this.equations = list;
    }

    public List<Equation> getEquations() {
        return this.equations;
    }

    public void setResHeader(BiblioItem biblioItem) {
        this.resHeader = biblioItem;
    }

    public static List<LayoutToken> getTokens(List<LayoutToken> list, int i, int i2) {
        return getTokensFrom(list, i, i2, 0);
    }

    public static List<LayoutToken> getTokensFrom(List<LayoutToken> list, int i, int i2, int i3) {
        ArrayList arrayList = new ArrayList();
        for (int i4 = i3; i4 < list.size(); i4++) {
            LayoutToken layoutToken = list.get(i4);
            if (layoutToken != null && layoutToken.getText() != null && layoutToken.getOffset() + layoutToken.getText().length() >= i) {
                if (layoutToken.getOffset() > i2) {
                    return arrayList;
                }
                arrayList.add(layoutToken);
            }
        }
        return arrayList;
    }
}
