package org.grobid.core.document;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.SystemUtils;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidExceptionStatus;
import org.grobid.core.exceptions.GrobidResourceException;
import org.grobid.core.process.ProcessPdfToXml;
import org.grobid.core.process.ProcessRunner;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.KeyGen;
import org.grobid.core.utilities.Utilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/grobid/core/document/DocumentSource.class */
public class DocumentSource {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) DocumentSource.class);
    private static final int KILLED_DUE_2_TIMEOUT = 143;
    private static final int MISSING_LIBXML2 = 127;
    private static final int MISSING_PDFTOXML = 126;
    public static final int PDFTOXML_FILES_AMOUNT_LIMIT = 5000;
    private File pdfFile;
    private File xmlFile;
    boolean cleanupXml = false;

    private DocumentSource() {
    }

    public static DocumentSource fromPdf(File file) {
        return fromPdf(file, -1, -1);
    }

    public static DocumentSource fromPdf(File file, int i, int i2) {
        return fromPdf(file, i, i2, false, true, false);
    }

    public static DocumentSource fromPdf(File file, int i, int i2, boolean z, boolean z2, boolean z3) {
        if (!file.exists() || file.isDirectory()) {
            throw new GrobidException("Input PDF file " + file + " does not exist or a directory", GrobidExceptionStatus.BAD_INPUT_DATA);
        }
        DocumentSource documentSource = new DocumentSource();
        documentSource.cleanupXml = true;
        try {
            documentSource.xmlFile = documentSource.pdf2xml(null, false, i, i2, file, GrobidProperties.getTempPath(), z, z2, z3);
            documentSource.pdfFile = file;
            return documentSource;
        } catch (Exception e) {
            documentSource.close(z, z2, z3);
            throw e;
        }
    }

    private String getPdfToXmlCommand(boolean z, boolean z2, boolean z3) {
        StringBuilder sb = new StringBuilder();
        sb.append(GrobidProperties.getPdfToXMLPath().getAbsolutePath());
        if (SystemUtils.IS_OS_WINDOWS) {
            sb.append(File.separator + "pdfalto");
        }
        sb.append(GrobidProperties.isContextExecutionServer().booleanValue() ? File.separator + "pdfalto_server" : File.separator + "pdfalto");
        sb.append(" -blocks -noImageInline -fullFontName ");
        if (!z) {
            sb.append(" -noImage ");
        }
        if (z2) {
            sb.append(" -annotation ");
        }
        if (z3) {
            sb.append(" -outline ");
        }
        sb.append(" -filesLimit 2000 ");
        return sb.toString();
    }

    public File pdf2xml(Integer num, boolean z, int i, int i2, File file, File file2, boolean z2, boolean z3, boolean z4) {
        LOGGER.debug("start pdf to xml sub process");
        long currentTimeMillis = System.currentTimeMillis();
        String pdfToXmlCommand = getPdfToXmlCommand(z2, z3, z4);
        if (i > 0) {
            pdfToXmlCommand = pdfToXmlCommand + " -f " + i + " ";
        }
        if (i2 > 0) {
            pdfToXmlCommand = pdfToXmlCommand + " -l " + i2 + " ";
        }
        File file3 = new File(file2, KeyGen.getKey() + ".lxml");
        this.xmlFile = file3;
        if (!file3.exists() || z) {
            List<String> arrayList = new ArrayList();
            for (String str : pdfToXmlCommand.split(" ")) {
                if (str.trim().length() > 0) {
                    arrayList.add(str);
                }
            }
            arrayList.add(file.getAbsolutePath());
            arrayList.add(file3.getAbsolutePath());
            if (GrobidProperties.isContextExecutionServer().booleanValue()) {
                file3 = processPdfToXmlServerMode(file, file3, arrayList);
            } else {
                if (!SystemUtils.IS_OS_WINDOWS) {
                    arrayList = Arrays.asList("bash", "-c", "ulimit -Sv " + (GrobidProperties.getPdfToXMLMemoryLimitMb().intValue() * 1024) + " && " + pdfToXmlCommand + " '" + file + "' " + file3);
                }
                LOGGER.debug("Executing command: " + arrayList);
                file3 = processPdfToXmlThreadMode(num, file, file3, arrayList);
            }
            File file4 = new File(file3.getAbsolutePath() + "_data");
            File[] listFiles = file4.listFiles();
            if (listFiles != null && listFiles.length > 5000) {
                LOGGER.warn("The temp folder " + file4 + " contains " + listFiles.length + " files and exceeds the limit, only the first " + PDFTOXML_FILES_AMOUNT_LIMIT + " asset files will be kept.");
            }
        }
        LOGGER.debug("pdf to xml sub process process finished. Time to process:" + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        return file3;
    }

    private File processPdfToXmlThreadMode(Integer num, File file, File file2, List<String> list) {
        LOGGER.debug("Executing: " + list.toString());
        ProcessRunner processRunner = new ProcessRunner(list, "pdfalto[" + file + "]", true);
        processRunner.start();
        try {
            try {
                if (num != null) {
                    processRunner.join(num.intValue());
                } else {
                    processRunner.join(GrobidProperties.getPdfToXMLTimeoutMs().intValue());
                }
            } catch (InterruptedException e) {
                file2 = null;
                processRunner.interrupt();
                Thread.currentThread().interrupt();
                processRunner.interrupt();
            }
            if (processRunner.getExitStatus() == null) {
                processRunner.killProcess();
                close(true, true, true);
                throw new GrobidException("PDF to XML conversion timed out", GrobidExceptionStatus.TIMEOUT);
            }
            if (processRunner.getExitStatus().intValue() == 0) {
                processRunner.interrupt();
                return file2;
            }
            String errorStreamContents = processRunner.getErrorStreamContents();
            close(true, true, true);
            throw new GrobidException("PDF to XML conversion failed on pdf file " + file + " " + (StringUtils.isEmpty(errorStreamContents) ? "" : "due to: " + errorStreamContents), GrobidExceptionStatus.PDFTOXML_CONVERSION_FAILURE);
        } catch (Throwable th) {
            processRunner.interrupt();
            throw th;
        }
    }

    private File processPdfToXmlServerMode(File file, File file2, List<String> list) {
        LOGGER.debug("Executing: " + list.toString());
        Integer process = ProcessPdfToXml.process(list);
        if (process == null) {
            throw new GrobidException("An error occurred while converting pdf " + file, GrobidExceptionStatus.BAD_INPUT_DATA);
        }
        if (process.intValue() == 143) {
            throw new GrobidException("PDF to XML conversion timed out", GrobidExceptionStatus.TIMEOUT);
        }
        if (process.intValue() == 126) {
            throw new GrobidException("PDF to XML conversion failed. Cannot find pdfalto executable", GrobidExceptionStatus.PDFTOXML_CONVERSION_FAILURE);
        }
        if (process.intValue() == 127) {
            throw new GrobidException("PDF to XML conversion failed. pdfalto cannot be executed correctly. Has libxml2 been installed in the system? More information can be found in the logs. ", GrobidExceptionStatus.PDFTOXML_CONVERSION_FAILURE);
        }
        if (process.intValue() != 0) {
            throw new GrobidException("PDF to XML conversion failed with error code: " + process, GrobidExceptionStatus.BAD_INPUT_DATA);
        }
        return file2;
    }

    private boolean cleanXmlFile(File file, boolean z, boolean z2, boolean z3) {
        boolean z4 = false;
        if (file != null) {
            try {
                if (file.exists()) {
                    z4 = file.delete();
                    if (!z4) {
                        throw new GrobidResourceException("Deletion of a temporary XML file failed for file '" + file.getAbsolutePath() + "'");
                    }
                    File file2 = new File(file + "_metadata.xml");
                    if (file2.exists()) {
                        z4 = Utilities.deleteDir(file2);
                        if (!z4) {
                            throw new GrobidResourceException("Deletion of temporary metadata file failed for file '" + file2.getAbsolutePath() + "'");
                        }
                    }
                }
            } catch (Exception e) {
                if (e instanceof GrobidResourceException) {
                    throw ((GrobidResourceException) e);
                }
                throw new GrobidResourceException("An exception occurred while deleting an XML file '" + file + "'.", e);
            }
        }
        if (z) {
            if (file != null) {
                try {
                    File file3 = new File(file + "_data");
                    if (file3.exists() && file3.isDirectory()) {
                        z4 = Utilities.deleteDir(file3);
                        if (!z4) {
                            throw new GrobidResourceException("Deletion of temporary image files failed for file '" + file3.getAbsolutePath() + "'");
                        }
                    }
                } catch (Exception e2) {
                    if (e2 instanceof GrobidResourceException) {
                        throw ((GrobidResourceException) e2);
                    }
                    throw new GrobidResourceException("An exception occurred while deleting an XML file '" + file + "'.", e2);
                }
            }
        }
        if (z2) {
            if (file != null) {
                try {
                    File file4 = new File(file + "_annot.xml");
                    if (file4.exists()) {
                        z4 = file4.delete();
                        if (!z4) {
                            throw new GrobidResourceException("Deletion of temporary annotation file failed for file '" + file4.getAbsolutePath() + "'");
                        }
                    }
                } catch (Exception e3) {
                    if (e3 instanceof GrobidResourceException) {
                        throw ((GrobidResourceException) e3);
                    }
                    throw new GrobidResourceException("An exception occurred while deleting an XML file '" + file + "'.", e3);
                }
            }
        }
        if (z3) {
            if (file != null) {
                try {
                    File file5 = new File(file + "_outline.xml");
                    if (file5.exists()) {
                        z4 = file5.delete();
                        if (!z4) {
                            throw new GrobidResourceException("Deletion of temporary outline file failed for file '" + file5.getAbsolutePath() + "'");
                        }
                    }
                } catch (Exception e4) {
                    if (e4 instanceof GrobidResourceException) {
                        throw ((GrobidResourceException) e4);
                    }
                    throw new GrobidResourceException("An exception occurred while deleting an XML file '" + file + "'.", e4);
                }
            }
        }
        return z4;
    }

    public void close(boolean z, boolean z2, boolean z3) {
        try {
            if (this.cleanupXml) {
                cleanXmlFile(this.xmlFile, z, z2, z3);
            }
        } catch (Exception e) {
            LOGGER.error("Cannot cleanup resources (just printing exception):", (Throwable) e);
        }
    }

    public static void close(DocumentSource documentSource, boolean z, boolean z2, boolean z3) {
        if (documentSource != null) {
            documentSource.close(z, z2, z3);
        }
    }

    public File getPdfFile() {
        return this.pdfFile;
    }

    public void setPdfFile(File file) {
        this.pdfFile = file;
    }

    public File getXmlFile() {
        return this.xmlFile;
    }

    public void setXmlFile(File file) {
        this.xmlFile = file;
    }
}
