package org.grobid.core.visualization;

import com.google.common.collect.Lists;
import java.awt.Desktop;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import net.sf.saxon.om.Item;
import net.sf.saxon.om.SequenceIterator;
import net.sf.saxon.trans.XPathException;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.grobid.core.data.Figure;
import org.grobid.core.data.Table;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentSource;
import org.grobid.core.engines.Engine;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.factory.GrobidFactory;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.GraphicObject;
import org.grobid.core.layout.GraphicObjectType;
import org.grobid.core.main.LibraryLoader;
import org.grobid.core.utilities.BoundingBoxCalculator;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.XQueryProcessor;

/* loaded from: input_file:org/grobid/core/visualization/FigureTableVisualizer.class */
public class FigureTableVisualizer {
    public static final boolean VISUALIZE_VECTOR_BOXES = true;
    private static Set<Integer> blacklistedPages;
    private static File inputPdf;
    private static boolean annotated;
    private static boolean annotatedFigure;
    static boolean singleFile = true;

    public static void main(String[] strArr) {
        try {
            processPdfFile(new File("/Users/zholudev/Downloads/Dkt. 1-1 - Complaint - Exhibit A.pdf"), null);
            System.out.println(Engine.getCntManager());
        } catch (Exception e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    private static Set<Integer> getVectorGraphicPages(File file) throws XPathException, IOException {
        return new HashSet();
    }

    private static void processPdfFile(File file, File file2) throws Exception {
        inputPdf = file;
        annotated = false;
        annotatedFigure = false;
        PDDocument load = PDDocument.load(file);
        File file3 = new File("/tmp/testFigures.pdf");
        Engine engine = setupEngine();
        File file4 = new File("/tmp/contentDir");
        FileUtils.deleteDirectory(file4);
        GrobidAnalysisConfig build = new GrobidAnalysisConfig.GrobidAnalysisConfigBuilder().pdfAssetPath(new File(file4, "tei")).withPreprocessImages(false).generateTeiCoordinates(Lists.newArrayList("figure")).withProcessVectorGraphics(true).build();
        DocumentSource fromPdf = DocumentSource.fromPdf(file, -1, -1, true, false, false);
        File file5 = new File(file4, "pdf2xml");
        file5.mkdirs();
        FileUtils.copyFileToDirectory(file, file4);
        File file6 = new File(file5, "input.xml");
        FileUtils.copyFile(fromPdf.getXmlFile(), file6);
        FileUtils.copyDirectory(new File(fromPdf.getXmlFile().getAbsolutePath() + "_data"), new File(file5, fromPdf.getXmlFile().getName() + "_data"));
        System.out.println(fromPdf.getXmlFile());
        blacklistedPages = getVectorGraphicPages(file5);
        PDDocument annotateFigureAndTables = annotateFigureAndTables(load, file6, engine.fullTextToTEIDoc(fromPdf, build), false, false, true, true, true);
        if (annotateFigureAndTables != null) {
            annotateFigureAndTables.save(file3);
            if (singleFile && Desktop.isDesktopSupported()) {
                Desktop.getDesktop().open(file3);
            }
        }
        if (file2 == null || !annotated) {
            return;
        }
        Engine.getCntManager().i("TABLES_TEST", "ANNOTATED_PDFS");
        FileUtils.copyFile(file3, new File(file2, annotated ? annotatedFigure ? file.getName() + "_annotatedFigure.pdf" : file.getName() + "_annotated.pdf" : file.getName()));
    }

    private static Engine setupEngine() {
        GrobidProperties.set_GROBID_HOME_PATH("grobid-home");
        GrobidProperties.setGrobidPropertiesPath("grobid-home/config/grobid.properties");
        LibraryLoader.load();
        return GrobidFactory.getInstance().getEngine();
    }

    public static PDDocument annotateFigureAndTables(PDDocument pDDocument, File file, Document document, boolean z, boolean z2, boolean z3, boolean z4, boolean z5) throws IOException, XPathException {
        String queryFromResources = XQueryProcessor.getQueryFromResources("figure-table-coords.xq");
        String tei = document.getTei();
        if (singleFile) {
        }
        SequenceIterator sequenceIterator = new XQueryProcessor(tei).getSequenceIterator(queryFromResources);
        if (z) {
            while (true) {
                Item next = sequenceIterator.next();
                if (next == null) {
                    break;
                }
                AnnotationUtil.annotatePage(pDDocument, next.getStringValue(), Boolean.parseBoolean(sequenceIterator.next().getStringValue()) ? 1 : 2);
            }
        }
        if (z2) {
            SequenceIterator sequenceIterator2 = new XQueryProcessor(file).getSequenceIterator(XQueryProcessor.getQueryFromResources("figure-coords-pdfalto.xq"));
            while (true) {
                Item next2 = sequenceIterator2.next();
                if (next2 == null) {
                    break;
                }
                AnnotationUtil.annotatePage(pDDocument, next2.getStringValue(), 3);
            }
        }
        if (z3) {
            int i = 10;
            if (document.getFigures() != null) {
                for (Figure figure : document.getFigures()) {
                    if (figure != null) {
                        i++;
                        List<GraphicObject> boxedGraphicObjects = figure.getBoxedGraphicObjects();
                        if (figure.getTextArea() != null) {
                            for (BoundingBox boundingBox : figure.getTextArea()) {
                                annotated = true;
                                AnnotationUtil.annotatePage(pDDocument, boundingBox.toString(), i, boxedGraphicObjects == null ? 1 : 2);
                            }
                        }
                        if (boxedGraphicObjects != null) {
                            for (GraphicObject graphicObject : boxedGraphicObjects) {
                                annotatedFigure = true;
                                AnnotationUtil.annotatePage(pDDocument, AnnotationUtil.getCoordString(graphicObject.getPage(), graphicObject.getX(), graphicObject.getY(), graphicObject.getWidth(), graphicObject.getHeight()), i, 2);
                            }
                        }
                    }
                }
            }
        }
        if (z5 && document.getImages() != null) {
            for (GraphicObject graphicObject2 : document.getImages()) {
                if (graphicObject2.getType() == GraphicObjectType.VECTOR_BOX) {
                    BoundingBox boundingBox2 = graphicObject2.getBoundingBox();
                    AnnotationUtil.annotatePage(pDDocument, AnnotationUtil.getCoordString(boundingBox2.getPage(), boundingBox2.getX(), boundingBox2.getY(), boundingBox2.getWidth(), boundingBox2.getHeight()), 12, 3);
                }
            }
        }
        if (z4) {
            boolean z6 = false;
            if (document.getTables() != null) {
                for (Table table : document.getTables()) {
                    z6 = true;
                    if (table.isGoodTable()) {
                        BoundingBox calculateOneBox = BoundingBoxCalculator.calculateOneBox(table.getContentTokens());
                        BoundingBox calculateOneBox2 = BoundingBoxCalculator.calculateOneBox(table.getFullDescriptionTokens());
                        System.out.println("Annotating TABLE on page: " + calculateOneBox.getPage());
                        AnnotationUtil.annotatePage(pDDocument, AnnotationUtil.getCoordString(calculateOneBox2), 100, 2);
                        AnnotationUtil.annotatePage(pDDocument, AnnotationUtil.getCoordString(calculateOneBox), 101, 2);
                        annotatedFigure = true;
                        annotated = true;
                        Engine.getCntManager().i("TABLES_TEST", "ANNOTATED_TABLES");
                    } else {
                        System.out.println("Skipping bad table on page: " + table.getTextArea().get(0).getPage());
                        Engine.getCntManager().i("TABLES_TEST", "BAD_TABLES");
                    }
                }
            }
            if (z6) {
                Engine.getCntManager().i("TABLES_TEST", "PDF_HAS_SOME_TABLES");
            }
        }
        return pDDocument;
    }
}
