package org.grobid.core.data;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import nu.xom.Attribute;
import nu.xom.Element;
import nu.xom.Node;
import org.apache.commons.lang3.StringUtils;
import org.apache.fontbox.ttf.HeaderTable;
import org.grobid.core.GrobidModels;
import org.grobid.core.data.table.Cell;
import org.grobid.core.data.table.Line;
import org.grobid.core.data.table.Row;
import org.grobid.core.document.Document;
import org.grobid.core.document.TEIFormatter;
import org.grobid.core.document.xml.XmlBuilderUtils;
import org.grobid.core.engines.Engine;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.counters.TableRejectionCounters;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.engines.label.TaggingLabels;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.tokenization.TaggingTokenCluster;
import org.grobid.core.tokenization.TaggingTokenClusteror;
import org.grobid.core.utilities.BoundingBoxCalculator;
import org.grobid.core.utilities.KeyGen;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.counters.CntManager;

/* loaded from: input_file:org/grobid/core/data/Table.class */
public class Table extends Figure {
    private List<LayoutToken> contentTokens = new ArrayList();
    private List<LayoutToken> fullDescriptionTokens = new ArrayList();
    private boolean goodTable = true;
    protected StringBuilder note;

    public void setGoodTable(boolean z) {
        this.goodTable = z;
    }

    public Table() {
        this.note = null;
        this.caption = new StringBuilder();
        this.header = new StringBuilder();
        this.content = new StringBuilder();
        this.label = new StringBuilder();
        this.note = new StringBuilder();
    }

    @Override // org.grobid.core.data.Figure
    public String toTEI(GrobidAnalysisConfig grobidAnalysisConfig, Document document, TEIFormatter tEIFormatter) {
        if (StringUtils.isEmpty(this.header) && StringUtils.isEmpty(this.caption)) {
            return null;
        }
        Element teiElement = XmlBuilderUtils.teiElement("figure");
        teiElement.addAttribute(new Attribute("type", "table"));
        if (this.id != null) {
            XmlBuilderUtils.addXmlId(teiElement, "tab_" + this.id);
        }
        if (grobidAnalysisConfig.getGenerateTeiCoordinates() != null && grobidAnalysisConfig.getGenerateTeiCoordinates().contains("figure")) {
            XmlBuilderUtils.addCoords(teiElement, LayoutTokensUtil.getCoordsStringForOneBox(getLayoutTokens()));
        }
        Element teiElement2 = XmlBuilderUtils.teiElement(HeaderTable.TAG, LayoutTokensUtil.normalizeText(this.header.toString()));
        Element teiElement3 = XmlBuilderUtils.teiElement("label", LayoutTokensUtil.normalizeText(this.label.toString()));
        Element element = null;
        if (this.caption != null) {
            element = XmlBuilderUtils.teiElement("figDesc");
            if (grobidAnalysisConfig.isGenerateTeiIds()) {
                XmlBuilderUtils.addXmlId(element, "_" + KeyGen.getKey().substring(0, 7));
            }
            if (this.labeledCaption == null || this.labeledCaption.length() <= 0) {
                element.appendChild(LayoutTokensUtil.normalizeText(this.caption.toString()).trim());
            } else {
                for (TaggingTokenCluster taggingTokenCluster : new TaggingTokenClusteror(GrobidModels.FULLTEXT, this.labeledCaption, this.captionLayoutTokens).cluster()) {
                    if (taggingTokenCluster != null) {
                        TaggingLabel taggingLabel = taggingTokenCluster.getTaggingLabel();
                        String normalizeDehyphenizeText = LayoutTokensUtil.normalizeDehyphenizeText(taggingTokenCluster.concatTokens());
                        if (taggingLabel.equals(TaggingLabels.CITATION_MARKER)) {
                            try {
                                List<Node> markReferencesTEILuceneBased = tEIFormatter.markReferencesTEILuceneBased(taggingTokenCluster.concatTokens(), document.getReferenceMarkerMatcher(), grobidAnalysisConfig.isGenerateTeiCoordinates("ref"), false);
                                if (markReferencesTEILuceneBased != null) {
                                    Iterator<Node> it = markReferencesTEILuceneBased.iterator();
                                    while (it.hasNext()) {
                                        element.appendChild(it.next());
                                    }
                                }
                            } catch (Exception e) {
                                LOGGER.warn("Problem when serializing TEI fragment for figure caption", (Throwable) e);
                            }
                        } else {
                            element.appendChild(XmlBuilderUtils.textNode(normalizeDehyphenizeText));
                        }
                    }
                }
            }
        }
        Element teiElement4 = XmlBuilderUtils.teiElement("table");
        processTableContent(teiElement4, getContentTokens());
        if (grobidAnalysisConfig.getGenerateTeiCoordinates() != null && grobidAnalysisConfig.getGenerateTeiCoordinates().contains("figure")) {
            XmlBuilderUtils.addCoords(teiElement4, LayoutTokensUtil.getCoordsStringForOneBox(getContentTokens()));
        }
        Element teiElement5 = this.note != null ? XmlBuilderUtils.teiElement("note", LayoutTokensUtil.normalizeText(this.note.toString())) : null;
        teiElement.appendChild(teiElement2);
        teiElement.appendChild(teiElement3);
        if (element != null) {
            teiElement.appendChild(element);
        }
        teiElement.appendChild(teiElement4);
        if (teiElement5 != null) {
            teiElement.appendChild(teiElement5);
        }
        return teiElement.toXML();
    }

    void processTableContent(Element element, List<LayoutToken> list) {
        List<Row> extractRows = Row.extractRows(Line.extractLines(Line.extractLineParts(list)));
        Row.insertEmptyCells(extractRows, Row.columnCount(extractRows));
        Row.mergeMulticolumnCells(extractRows);
        for (Row row : extractRows) {
            Element teiElement = XmlBuilderUtils.teiElement("row");
            element.appendChild(teiElement);
            for (Cell cell : row.getContent()) {
                Element teiElement2 = XmlBuilderUtils.teiElement("cell");
                teiElement.appendChild(teiElement2);
                if (cell.getColspan() > 1) {
                    teiElement2.addAttribute(new Attribute("cols", Integer.toString(cell.getColspan())));
                }
                teiElement2.appendChild(cell.getText().trim());
            }
        }
    }

    private String cleanString(String str) {
        return str.replace("\n", " ").replace("  ", " ").trim();
    }

    public String getNote() {
        return this.note.toString();
    }

    public void setNote(StringBuilder sb) {
        this.note = sb;
    }

    public void appendNote(String str) {
        this.note.append(str);
    }

    public boolean firstCheck() {
        this.goodTable = this.goodTable && validateTable();
        return this.goodTable;
    }

    public boolean secondCheck() {
        this.goodTable = this.goodTable && !badTableAdvancedCheck();
        return this.goodTable;
    }

    private boolean validateTable() {
        CntManager cntManager = Engine.getCntManager();
        if (StringUtils.isEmpty(this.label) || StringUtils.isEmpty(this.header) || StringUtils.isEmpty(this.content)) {
            cntManager.i(TableRejectionCounters.EMPTY_LABEL_OR_HEADER_OR_CONTENT);
            return false;
        }
        try {
            Integer.valueOf(getLabel().trim(), 10);
            if (getHeader().toLowerCase().startsWith("table")) {
                return true;
            }
            cntManager.i(TableRejectionCounters.HEADER_NOT_STARTS_WITH_TABLE_WORD);
            return false;
        } catch (NumberFormatException e) {
            cntManager.i(TableRejectionCounters.CANNOT_PARSE_LABEL_TO_INT);
            return false;
        }
    }

    private boolean badTableAdvancedCheck() {
        CntManager cntManager = Engine.getCntManager();
        BoundingBox calculateOneBox = BoundingBoxCalculator.calculateOneBox(this.contentTokens, true);
        BoundingBox calculateOneBox2 = BoundingBoxCalculator.calculateOneBox(this.fullDescriptionTokens, true);
        if (calculateOneBox.getPage() != calculateOneBox2.getPage()) {
            cntManager.i(TableRejectionCounters.HEADER_AND_CONTENT_DIFFERENT_PAGES);
            return true;
        }
        if (calculateOneBox.intersect(calculateOneBox2)) {
            cntManager.i(TableRejectionCounters.HEADER_AND_CONTENT_INTERSECT);
            return true;
        }
        if (calculateOneBox2.area() > calculateOneBox.area()) {
            cntManager.i(TableRejectionCounters.HEADER_AREA_BIGGER_THAN_CONTENT);
            return true;
        }
        if (calculateOneBox.getHeight() < 40.0d) {
            cntManager.i(TableRejectionCounters.CONTENT_SIZE_TOO_SMALL);
            return true;
        }
        if (calculateOneBox.getWidth() < 100.0d) {
            cntManager.i(TableRejectionCounters.CONTENT_WIDTH_TOO_SMALL);
            return true;
        }
        if (this.contentTokens.size() < 10) {
            cntManager.i(TableRejectionCounters.FEW_TOKENS_IN_CONTENT);
            return true;
        }
        if (this.fullDescriptionTokens.size() >= 5) {
            return false;
        }
        cntManager.i(TableRejectionCounters.FEW_TOKENS_IN_HEADER);
        return true;
    }

    public List<LayoutToken> getContentTokens() {
        return this.contentTokens;
    }

    public List<LayoutToken> getFullDescriptionTokens() {
        return this.fullDescriptionTokens;
    }

    public boolean isGoodTable() {
        return this.goodTable;
    }
}
