package org.grobid.core.engines.patent;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;
import java.util.zip.GZIPInputStream;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.text.StringSubstitutor;
import org.grobid.core.GrobidModels;
import org.grobid.core.analyzers.GrobidAnalyzer;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.BiblioSet;
import org.grobid.core.data.PatentItem;
import org.grobid.core.document.Document;
import org.grobid.core.document.DocumentSource;
import org.grobid.core.document.OPSService;
import org.grobid.core.document.PatentDocument;
import org.grobid.core.engines.EngineParsers;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.engines.label.TaggingLabels;
import org.grobid.core.engines.tagging.GenericTagger;
import org.grobid.core.engines.tagging.TaggerFactory;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidResourceException;
import org.grobid.core.features.FeaturesVectorReference;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.layout.Page;
import org.grobid.core.lexicon.Lexicon;
import org.grobid.core.sax.PatentAnnotationSaxParser;
import org.grobid.core.sax.TextSaxParser;
import org.grobid.core.utilities.BoundingBoxCalculator;
import org.grobid.core.utilities.Consolidation;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.KeyGen;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.utilities.TextUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/* loaded from: input_file:org/grobid/core/engines/patent/ReferenceExtractor.class */
public class ReferenceExtractor implements Closeable {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) ReferenceExtractor.class);
    private GenericTagger taggerAll;
    private PatentRefParser patentParser;
    private Consolidation consolidator;
    private String tmpPath;
    public boolean debug;
    public Lexicon lexicon;
    public String currentPatentNumber;
    public OPSService ops;
    private String description;
    public ArrayList<BibDataSet> resBib;
    private String path;
    private EngineParsers parsers;
    private GrobidAnalyzer analyzer;
    private LanguageUtilities languageUtilities;

    public void setDocumentPath(String str) {
        this.path = str;
    }

    public ReferenceExtractor() {
        this(new EngineParsers());
    }

    public ReferenceExtractor(EngineParsers engineParsers) {
        this.taggerAll = null;
        this.patentParser = null;
        this.consolidator = null;
        this.tmpPath = null;
        this.debug = false;
        this.lexicon = Lexicon.getInstance();
        this.currentPatentNumber = null;
        this.ops = null;
        this.description = null;
        this.resBib = null;
        this.path = null;
        this.analyzer = null;
        this.languageUtilities = LanguageUtilities.getInstance();
        this.parsers = engineParsers;
        this.taggerAll = TaggerFactory.getTagger(GrobidModels.PATENT_ALL);
        this.analyzer = GrobidAnalyzer.getInstance();
    }

    public String extractAllReferencesOPS(boolean z, int i, boolean z2, List<PatentItem> list, List<BibDataSet> list2) {
        try {
            if (this.description != null) {
                return extractAllReferencesString(this.description, z, i, z2, list, list2);
            }
            return null;
        } catch (Exception e) {
            throw new GrobidException(e);
        }
    }

    public String extractPatentReferencesXMLFile(String str, boolean z, int i, boolean z2, List<PatentItem> list) {
        return extractAllReferencesXMLFile(str, z, i, z2, list, null);
    }

    public String extractAllReferencesXMLFile(String str, boolean z, int i, boolean z2, List<PatentItem> list, List<BibDataSet> list2) {
        if (list == null) {
            try {
                System.out.println("Warning patents List is null!");
            } catch (Exception e) {
                e.printStackTrace();
                return null;
            }
        }
        TextSaxParser textSaxParser = new TextSaxParser();
        textSaxParser.setFilter("description");
        SAXParserFactory newInstance = SAXParserFactory.newInstance();
        newInstance.setValidating(false);
        newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
        newInstance.setFeature("http://xml.org/sax/features/validation", false);
        XMLReader createXMLReader = XMLReaderFactory.createXMLReader();
        createXMLReader.setEntityResolver(new EntityResolver() { // from class: org.grobid.core.engines.patent.ReferenceExtractor.1
            @Override // org.xml.sax.EntityResolver
            public InputSource resolveEntity(String str2, String str3) {
                return new InputSource(new ByteArrayInputStream("<?xml version=\"1.0\" encoding=\"UTF-8\"?>".getBytes()));
            }
        });
        createXMLReader.setContentHandler(textSaxParser);
        InputSource inputSource = str.endsWith(".gz") ? new InputSource(new DataInputStream(new GZIPInputStream(new FileInputStream(str)))) : new InputSource(str);
        inputSource.setEncoding("UTF-8");
        createXMLReader.parse(inputSource);
        this.description = textSaxParser.getText();
        this.currentPatentNumber = textSaxParser.currentPatentNumber;
        if (this.description != null) {
            return extractAllReferencesString(this.description, true, 0, z2, list, list2);
        }
        return null;
    }

    public String extractAllReferencesPDFFile(String str, boolean z, int i, boolean z2, List<PatentItem> list, List<BibDataSet> list2) {
        DocumentSource documentSource = null;
        String str2 = null;
        try {
            DocumentSource fromPdf = DocumentSource.fromPdf(new File(str));
            PatentDocument patentDocument = new PatentDocument(fromPdf);
            patentDocument.addTokenizedDocument(GrobidAnalysisConfig.defaultInstance());
            if (patentDocument.getBlocks() == null) {
                DocumentSource.close(fromPdf, true, true, true);
                return null;
            }
            this.description = patentDocument.getAllBlocksClean(25, -1);
            if (this.description != null) {
                str2 = extractAllReferencesString(this.description, z, i, z2, list, list2);
            }
            return documentSource;
        } finally {
            DocumentSource.close(documentSource, true, true, true);
        }
    }

    public String annotateAllReferencesPDFFile(String str, boolean z, int i, boolean z2, List<PatentItem> list, List<BibDataSet> list2) {
        try {
            try {
                DocumentSource fromPdf = DocumentSource.fromPdf(new File(str));
                PatentDocument patentDocument = new PatentDocument(fromPdf);
                List<LayoutToken> addTokenizedDocument = patentDocument.addTokenizedDocument(GrobidAnalysisConfig.defaultInstance());
                if (patentDocument.getBlocks() == null) {
                    throw new GrobidException("PDF parsing resulted in empty content");
                }
                if (addTokenizedDocument == null || addTokenizedDocument.size() <= 0) {
                    DocumentSource.close(fromPdf, true, true, true);
                    return null;
                }
                String annotateAllReferences = annotateAllReferences(patentDocument, addTokenizedDocument, z, i, z2, list, list2);
                DocumentSource.close(fromPdf, true, true, true);
                return annotateAllReferences;
            } catch (Exception e) {
                LOGGER.error("Error in extractAllReferencesPDFFile", (Throwable) e);
                DocumentSource.close(null, true, true, true);
                return null;
            }
        } catch (Throwable th) {
            DocumentSource.close(null, true, true, true);
            throw th;
        }
    }

    public String extractAllReferencesString(String str, boolean z, int i, boolean z2, List<PatentItem> list, List<BibDataSet> list2) {
        if (list == null) {
            try {
                list = new ArrayList();
            } catch (Exception e) {
                throw new GrobidException("An exception occured while running Grobid.", e);
            }
        }
        if (list2 == null) {
            list2 = new ArrayList();
        }
        if (this.patentParser == null) {
            this.patentParser = new PatentRefParser();
        }
        ArrayList arrayList = new ArrayList();
        String replace = str.replace("\n", " ").replace("\t", " ");
        List<String> list3 = this.analyzer.tokenize(replace, this.languageUtilities.runLanguageId(replace, 500));
        if (list3.size() == 0) {
            return null;
        }
        List<OffsetPosition> list4 = this.lexicon.tokenPositionsJournalNames(replace);
        List<OffsetPosition> list5 = this.lexicon.tokenPositionsAbbrevJournalNames(replace);
        List<OffsetPosition> list6 = this.lexicon.tokenPositionsConferenceNames(replace);
        List<OffsetPosition> list7 = this.lexicon.tokenPositionsPublisherNames(replace);
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        for (String str2 : list3) {
            boolean z3 = false;
            boolean z4 = false;
            boolean z5 = false;
            boolean z6 = false;
            boolean z7 = false;
            if (str2.trim().length() != 0 && !str2.equals(" ") && !str2.equals("\t") && !str2.equals("\n") && !str2.equals("\r")) {
                if (list4 != null) {
                    if (i2 == list4.size() - 1 && list4.get(i2).end < i6) {
                        z7 = true;
                    }
                    if (!z7) {
                        int i7 = i2;
                        while (true) {
                            if (i7 >= list4.size()) {
                                break;
                            }
                            if (list4.get(i7).start <= i6 && list4.get(i7).end >= i6) {
                                z3 = true;
                                i2 = i7;
                                break;
                            }
                            if (list4.get(i7).start > i6) {
                                z3 = false;
                                i2 = i7;
                                break;
                            }
                            i7++;
                        }
                    }
                }
                boolean z8 = false;
                if (list5 != null) {
                    if (i3 == list5.size() - 1 && list5.get(i3).end < i6) {
                        z8 = true;
                    }
                    if (!z8) {
                        int i8 = i3;
                        while (true) {
                            if (i8 >= list5.size()) {
                                break;
                            }
                            if (list5.get(i8).start <= i6 && list5.get(i8).end >= i6) {
                                z4 = true;
                                i3 = i8;
                                break;
                            }
                            if (list5.get(i8).start > i6) {
                                z4 = false;
                                i3 = i8;
                                break;
                            }
                            i8++;
                        }
                    }
                }
                boolean z9 = false;
                if (list6 != null) {
                    if (i4 == list6.size() - 1 && list6.get(i4).end < i6) {
                        z9 = true;
                    }
                    if (!z9) {
                        int i9 = i4;
                        while (true) {
                            if (i9 >= list6.size()) {
                                break;
                            }
                            if (list6.get(i9).start <= i6 && list6.get(i9).end >= i6) {
                                z5 = true;
                                i4 = i9;
                                break;
                            }
                            if (list6.get(i9).start > i6) {
                                z5 = false;
                                i4 = i9;
                                break;
                            }
                            i9++;
                        }
                    }
                }
                boolean z10 = false;
                if (list7 != null) {
                    if (i5 == list7.size() - 1 && list7.get(i5).end < i6) {
                        z10 = true;
                    }
                    if (!z10) {
                        int i10 = i5;
                        while (true) {
                            if (i10 >= list7.size()) {
                                break;
                            }
                            if (list7.get(i10).start <= i6 && list7.get(i10).end >= i6) {
                                z6 = true;
                                i5 = i10;
                                break;
                            }
                            if (list7.get(i10).start > i6) {
                                z6 = false;
                                i5 = i10;
                                break;
                            }
                            i10++;
                        }
                    }
                }
                arrayList.add(FeaturesVectorReference.addFeaturesPatentReferences(str2, list3.size(), i6, z3, z4, z5, z6).printVector());
                i6++;
            }
        }
        arrayList.add("\n");
        StringTokenizer stringTokenizer = new StringTokenizer(this.taggerAll.label(arrayList), "\n");
        ArrayList<String> arrayList2 = new ArrayList();
        ArrayList<String> arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        ArrayList arrayList7 = new ArrayList();
        boolean z11 = true;
        String str3 = null;
        double d = 0.0d;
        int i11 = 0;
        int i12 = 0;
        int i13 = 0;
        int i14 = 0;
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (nextToken.trim().length() != 0) {
                StringTokenizer stringTokenizer2 = new StringTokenizer(nextToken, "\t ");
                boolean z12 = true;
                String str4 = "";
                String str5 = null;
                String str6 = null;
                while (stringTokenizer2.hasMoreTokens()) {
                    if (z12) {
                        str6 = stringTokenizer2.nextToken().trim();
                        z12 = false;
                        boolean z13 = false;
                        while (!z13 && i14 < list3.size()) {
                            String str7 = list3.get(i14);
                            i13 += str7.length();
                            if (str7.equals(" ")) {
                                str4 = str4 + str7;
                            } else if (str7.equals(str6)) {
                                z13 = true;
                            }
                            i14++;
                        }
                    } else {
                        str5 = stringTokenizer2.nextToken().trim();
                    }
                }
                if (str5 == null) {
                    i11 += i13;
                    i13 = 0;
                } else {
                    double d2 = 0.0d;
                    int lastIndexOf = str5.lastIndexOf("/");
                    if (lastIndexOf != -1) {
                        String substring = str5.substring(lastIndexOf + 1, str5.length());
                        try {
                            d2 = Double.parseDouble(substring);
                        } catch (Exception e2) {
                            LOGGER.debug(substring + " cannot be parsed.");
                        }
                        str5 = str5.substring(0, lastIndexOf);
                    }
                    if (str6 != null) {
                        if (str5.endsWith("<refPatent>")) {
                            if (str3 == null) {
                                str3 = str4 + str6;
                                i12 = i11;
                                z11 = true;
                                d = d2;
                            } else if (!z11) {
                                arrayList3.add(str3);
                                arrayList5.add(Integer.valueOf(i12));
                                arrayList7.add(new Double(d));
                                z11 = true;
                                str3 = str4 + str6;
                                i12 = i11;
                                d = d2;
                            } else if (str5.equals("I-<refPatent>")) {
                                arrayList2.add(str3);
                                arrayList4.add(Integer.valueOf(i12));
                                arrayList6.add(new Double(d));
                                z11 = true;
                                str3 = str4 + str6;
                                i12 = i11;
                                d = d2;
                            } else {
                                str3 = str3 + str4 + str6;
                                if (d2 > d) {
                                    d = d2;
                                }
                            }
                        } else if (str5.endsWith("<refNPL>")) {
                            if (str3 == null) {
                                str3 = str4 + str6;
                                i12 = i11;
                                z11 = false;
                                d = d2;
                            } else if (z11) {
                                arrayList2.add(str3);
                                arrayList4.add(Integer.valueOf(i12));
                                arrayList6.add(new Double(d));
                                z11 = false;
                                str3 = str4 + str6;
                                i12 = i11;
                                d = d2;
                            } else if (str5.equals("I-<refNPL>")) {
                                arrayList3.add(str3);
                                arrayList5.add(Integer.valueOf(i12));
                                arrayList7.add(new Double(d));
                                z11 = false;
                                str3 = str4 + str6;
                                i12 = i11;
                                d = d2;
                            } else {
                                str3 = str3 + str4 + str6;
                                if (d2 > d) {
                                    d = d2;
                                }
                            }
                        } else if (str5.equals(TaggingLabels.OTHER_LABEL)) {
                            if (str3 != null) {
                                if (z11) {
                                    arrayList2.add(str3);
                                    arrayList4.add(Integer.valueOf(i12));
                                    arrayList6.add(new Double(d));
                                } else {
                                    arrayList3.add(str3);
                                    arrayList5.add(Integer.valueOf(i12));
                                    arrayList7.add(new Double(d));
                                }
                                z11 = false;
                            }
                            str3 = null;
                            d = 0.0d;
                        }
                    }
                    i11 += i13;
                    i13 = 0;
                }
            }
        }
        int i15 = 0;
        for (String str8 : arrayList2) {
            this.patentParser.setRawRefText(str8);
            this.patentParser.setRawRefTextOffset(((Integer) arrayList4.get(i15)).intValue());
            for (PatentItem patentItem : this.patentParser.processRawRefText()) {
                patentItem.setContext(str8);
                patentItem.setConf(((Double) arrayList6.get(i15)).doubleValue());
                list.add(patentItem);
            }
            i15++;
        }
        ArrayList arrayList8 = new ArrayList();
        if (z) {
            ArrayList arrayList9 = new ArrayList();
            for (PatentItem patentItem2 : list) {
                if (arrayList8.contains(patentItem2.getNumberEpoDoc())) {
                    arrayList9.add(patentItem2);
                } else {
                    arrayList8.add(patentItem2.getNumberEpoDoc());
                }
            }
            Iterator it = arrayList9.iterator();
            while (it.hasNext()) {
                list.remove((PatentItem) it.next());
            }
        }
        if (list2 != null) {
            int i16 = 0;
            for (String str9 : arrayList3) {
                BiblioItem processing = this.parsers.getCitationParser().processing(str9, i);
                BibDataSet bibDataSet = new BibDataSet();
                processing.setReference(str9);
                bibDataSet.setResBib(processing);
                bibDataSet.setRawBib(str9);
                bibDataSet.addOffset(((Integer) arrayList5.get(i16)).intValue());
                list2.add(bibDataSet);
                i16++;
            }
        }
        int size = list != null ? list.size() : 0;
        if (list2 != null) {
            int size2 = size + list2.size();
        }
        String substring2 = KeyGen.getKey().substring(0, 7);
        String str10 = ((((("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<TEI xml:space=\"preserve\" xmlns=\"http://www.tei-c.org/ns/1.0\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<teiHeader />\n") + "<text>\n") + "<div id=\"_" + substring2 + "\">\n") + TextUtilities.HTMLEncode(replace)) + "</div>\n") + "<div type=\"references\">\n";
        if (list != null || list2 != null) {
            str10 = str10 + "<listBibl>\n";
        }
        if (list != null) {
            Iterator<PatentItem> it2 = list.iterator();
            while (it2.hasNext()) {
                str10 = str10 + it2.next().toTEI(true, substring2) + "\n";
            }
        }
        if (list2 != null) {
            Iterator<BibDataSet> it3 = list2.iterator();
            while (it3.hasNext()) {
                str10 = str10 + it3.next().toTEI(z2) + "\n";
            }
        }
        if (list != null || list2 != null) {
            str10 = str10 + "</listBibl>\n";
        }
        return ((str10 + "</div>\n") + "</text>\n") + "</TEI>";
    }

    public String annotateAllReferences(Document document, List<LayoutToken> list, boolean z, int i, boolean z2, List<PatentItem> list2, List<BibDataSet> list3) {
        try {
            if (list.size() == 0) {
                return null;
            }
            if (list2 == null) {
                list2 = new ArrayList();
            }
            if (list3 == null) {
                list3 = new ArrayList();
            }
            if (this.patentParser == null) {
                this.patentParser = new PatentRefParser();
            }
            ArrayList arrayList = new ArrayList();
            StringBuilder sb = new StringBuilder();
            int i2 = 0;
            for (int size = list.size() - 1; size > 0; size--) {
                LayoutToken layoutToken = list.get(size);
                if (layoutToken != null && layoutToken.getText() != null) {
                    sb.insert(0, layoutToken.getText());
                    i2 += layoutToken.getText().length();
                }
                if (i2 > 500) {
                    break;
                }
            }
            String replace = sb.toString().replace("\n", " ").replace("\t", " ");
            this.languageUtilities.runLanguageId(replace);
            List<OffsetPosition> list4 = this.lexicon.tokenPositionsJournalNames(replace);
            List<OffsetPosition> list5 = this.lexicon.tokenPositionsAbbrevJournalNames(replace);
            List<OffsetPosition> list6 = this.lexicon.tokenPositionsConferenceNames(replace);
            List<OffsetPosition> list7 = this.lexicon.tokenPositionsPublisherNames(replace);
            int i3 = 0;
            int i4 = 0;
            int i5 = 0;
            int i6 = 0;
            int i7 = 0;
            Iterator<LayoutToken> it = list.iterator();
            while (it.hasNext()) {
                String text = it.next().getText();
                boolean z3 = false;
                boolean z4 = false;
                boolean z5 = false;
                boolean z6 = false;
                boolean z7 = false;
                if (text.trim().length() != 0 && !text.equals(" ") && !text.equals("\t") && !text.equals("\n") && !text.equals("\r")) {
                    if (list4 != null) {
                        if (i3 == list4.size() - 1 && list4.get(i3).end < i7) {
                            z7 = true;
                        }
                        if (!z7) {
                            int i8 = i3;
                            while (true) {
                                if (i8 >= list4.size()) {
                                    break;
                                }
                                if (list4.get(i8).start <= i7 && list4.get(i8).end >= i7) {
                                    z3 = true;
                                    i3 = i8;
                                    break;
                                }
                                if (list4.get(i8).start > i7) {
                                    z3 = false;
                                    i3 = i8;
                                    break;
                                }
                                i8++;
                            }
                        }
                    }
                    boolean z8 = false;
                    if (list5 != null) {
                        if (i4 == list5.size() - 1 && list5.get(i4).end < i7) {
                            z8 = true;
                        }
                        if (!z8) {
                            int i9 = i4;
                            while (true) {
                                if (i9 >= list5.size()) {
                                    break;
                                }
                                if (list5.get(i9).start <= i7 && list5.get(i9).end >= i7) {
                                    z4 = true;
                                    i4 = i9;
                                    break;
                                }
                                if (list5.get(i9).start > i7) {
                                    z4 = false;
                                    i4 = i9;
                                    break;
                                }
                                i9++;
                            }
                        }
                    }
                    boolean z9 = false;
                    if (list6 != null) {
                        if (i5 == list6.size() - 1 && list6.get(i5).end < i7) {
                            z9 = true;
                        }
                        if (!z9) {
                            int i10 = i5;
                            while (true) {
                                if (i10 >= list6.size()) {
                                    break;
                                }
                                if (list6.get(i10).start <= i7 && list6.get(i10).end >= i7) {
                                    z5 = true;
                                    i5 = i10;
                                    break;
                                }
                                if (list6.get(i10).start > i7) {
                                    z5 = false;
                                    i5 = i10;
                                    break;
                                }
                                i10++;
                            }
                        }
                    }
                    boolean z10 = false;
                    if (list7 != null) {
                        if (i6 == list7.size() - 1 && list7.get(i6).end < i7) {
                            z10 = true;
                        }
                        if (!z10) {
                            int i11 = i6;
                            while (true) {
                                if (i11 >= list7.size()) {
                                    break;
                                }
                                if (list7.get(i11).start <= i7 && list7.get(i11).end >= i7) {
                                    z6 = true;
                                    i6 = i11;
                                    break;
                                }
                                if (list7.get(i11).start > i7) {
                                    z6 = false;
                                    i6 = i11;
                                    break;
                                }
                                i11++;
                            }
                        }
                    }
                    arrayList.add(FeaturesVectorReference.addFeaturesPatentReferences(text, list.size(), i7, z3, z4, z5, z6).printVector());
                    i7++;
                }
            }
            arrayList.add("\n");
            StringTokenizer stringTokenizer = new StringTokenizer(this.taggerAll.label(arrayList), "\n");
            ArrayList<String> arrayList2 = new ArrayList();
            ArrayList<String> arrayList3 = new ArrayList();
            ArrayList arrayList4 = new ArrayList();
            ArrayList arrayList5 = new ArrayList();
            ArrayList arrayList6 = new ArrayList();
            ArrayList arrayList7 = new ArrayList();
            boolean z11 = true;
            String str = null;
            double d = 0.0d;
            int i12 = 0;
            int i13 = 0;
            int i14 = 0;
            int i15 = 0;
            while (stringTokenizer.hasMoreTokens()) {
                String nextToken = stringTokenizer.nextToken();
                if (nextToken.trim().length() != 0) {
                    StringTokenizer stringTokenizer2 = new StringTokenizer(nextToken, "\t ");
                    boolean z12 = true;
                    String str2 = "";
                    String str3 = null;
                    String str4 = null;
                    while (stringTokenizer2.hasMoreTokens()) {
                        if (z12) {
                            str4 = stringTokenizer2.nextToken().trim();
                            z12 = false;
                            boolean z13 = false;
                            while (!z13 && i15 < list.size()) {
                                LayoutToken layoutToken2 = list.get(i15);
                                if (layoutToken2 != null && layoutToken2.getText() != null) {
                                    String text2 = layoutToken2.getText();
                                    i14 += text2.length();
                                    if (text2.equals(" ")) {
                                        str2 = str2 + text2;
                                    } else if (text2.equals(str4)) {
                                        z13 = true;
                                    }
                                    i15++;
                                }
                            }
                        } else {
                            str3 = stringTokenizer2.nextToken().trim();
                        }
                    }
                    if (str3 == null) {
                        i12 += i14;
                        i14 = 0;
                    } else {
                        double d2 = 0.0d;
                        int lastIndexOf = str3.lastIndexOf("/");
                        if (lastIndexOf != -1) {
                            String substring = str3.substring(lastIndexOf + 1, str3.length());
                            try {
                                d2 = Double.parseDouble(substring);
                            } catch (Exception e) {
                                LOGGER.debug(substring + " cannot be parsed.");
                            }
                            str3 = str3.substring(0, lastIndexOf);
                        }
                        if (str4 != null) {
                            if (str3.endsWith("<refPatent>")) {
                                if (str == null) {
                                    str = str2 + str4;
                                    i13 = i12;
                                    z11 = true;
                                    d = d2;
                                } else if (!z11) {
                                    arrayList3.add(str);
                                    arrayList5.add(Integer.valueOf(i13));
                                    arrayList7.add(new Double(d));
                                    z11 = true;
                                    str = str2 + str4;
                                    i13 = i12;
                                    d = d2;
                                } else if (str3.equals("I-<refPatent>")) {
                                    arrayList2.add(str);
                                    arrayList4.add(Integer.valueOf(i13));
                                    arrayList6.add(new Double(d));
                                    z11 = true;
                                    str = str2 + str4;
                                    i13 = i12;
                                    d = d2;
                                } else {
                                    str = str + str2 + str4;
                                    if (d2 > d) {
                                        d = d2;
                                    }
                                }
                            } else if (str3.endsWith("<refNPL>")) {
                                if (str == null) {
                                    str = str2 + str4;
                                    i13 = i12;
                                    z11 = false;
                                    d = d2;
                                } else if (z11) {
                                    arrayList2.add(str);
                                    arrayList4.add(Integer.valueOf(i13));
                                    arrayList6.add(new Double(d));
                                    z11 = false;
                                    str = str2 + str4;
                                    i13 = i12;
                                    d = d2;
                                } else if (str3.equals("I-<refNPL>")) {
                                    arrayList3.add(str);
                                    arrayList5.add(Integer.valueOf(i13));
                                    arrayList7.add(new Double(d));
                                    z11 = false;
                                    str = str2 + str4;
                                    i13 = i12;
                                    d = d2;
                                } else {
                                    str = str + str2 + str4;
                                    if (d2 > d) {
                                        d = d2;
                                    }
                                }
                            } else if (str3.equals(TaggingLabels.OTHER_LABEL)) {
                                if (str != null) {
                                    if (z11) {
                                        arrayList2.add(str);
                                        arrayList4.add(Integer.valueOf(i13));
                                        arrayList6.add(new Double(d));
                                    } else {
                                        arrayList3.add(str);
                                        arrayList5.add(Integer.valueOf(i13));
                                        arrayList7.add(new Double(d));
                                    }
                                    z11 = false;
                                }
                                str = null;
                                d = 0.0d;
                            }
                        }
                        i12 += i14;
                        i14 = 0;
                    }
                }
            }
            int i16 = 0;
            for (String str5 : arrayList2) {
                this.patentParser.setRawRefText(str5);
                this.patentParser.setRawRefTextOffset(((Integer) arrayList4.get(i16)).intValue());
                for (PatentItem patentItem : this.patentParser.processRawRefText()) {
                    patentItem.setContext(str5);
                    patentItem.setConf(((Double) arrayList6.get(i16)).doubleValue());
                    list2.add(patentItem);
                    List<LayoutToken> tokens = Document.getTokens(list, patentItem.getOffsetBegin(), patentItem.getOffsetEnd());
                    if (tokens != null && tokens.size() > 0) {
                        patentItem.setCoordinates(BoundingBoxCalculator.calculate(tokens));
                    }
                }
                i16++;
            }
            ArrayList arrayList8 = new ArrayList();
            if (z) {
                ArrayList arrayList9 = new ArrayList();
                for (PatentItem patentItem2 : list2) {
                    if (arrayList8.contains(patentItem2.getNumberEpoDoc())) {
                        arrayList9.add(patentItem2);
                    } else {
                        arrayList8.add(patentItem2.getNumberEpoDoc());
                    }
                }
                Iterator it2 = arrayList9.iterator();
                while (it2.hasNext()) {
                    list2.remove((PatentItem) it2.next());
                }
            }
            if (list3 != null) {
                int i17 = 0;
                for (String str6 : arrayList3) {
                    BiblioItem processing = this.parsers.getCitationParser().processing(str6, i);
                    BibDataSet bibDataSet = new BibDataSet();
                    processing.setReference(str6);
                    bibDataSet.setResBib(processing);
                    bibDataSet.setRawBib(str6);
                    bibDataSet.addOffset(((Integer) arrayList5.get(i17)).intValue());
                    list3.add(bibDataSet);
                    i17++;
                }
            }
            int size2 = list2 != null ? list2.size() : 0;
            if (list3 != null) {
                int size3 = size2 + list3.size();
            }
            StringBuilder sb2 = new StringBuilder();
            sb2.append("{");
            List<Page> pages = document.getPages();
            int i18 = 1;
            sb2.append("\"pages\": [");
            for (Page page : pages) {
                if (i18 > 1) {
                    sb2.append(", ");
                }
                sb2.append("{\"page_height\":" + page.getHeight());
                sb2.append(", \"page_width\":" + page.getWidth() + StringSubstitutor.DEFAULT_VAR_END);
                i18++;
            }
            sb2.append("]");
            if (list2 != null) {
                sb2.append(", \"patents\": [");
                boolean z14 = true;
                for (PatentItem patentItem3 : list2) {
                    if (z14) {
                        z14 = false;
                    } else {
                        sb2.append(", ");
                    }
                    sb2.append(patentItem3.toJson(null, true));
                }
                sb2.append("]");
            }
            if (list3 != null) {
                sb2.append(", \"articles\": [");
                for (BibDataSet bibDataSet2 : list3) {
                }
                sb2.append("]");
            }
            sb2.append(StringSubstitutor.DEFAULT_VAR_END);
            return sb2.toString();
        } catch (Exception e2) {
            throw new GrobidException("An exception occured while running Grobid.", e2);
        }
    }

    public String references2TEI2() {
        BiblioSet biblioSet = new BiblioSet();
        Iterator<BibDataSet> it = this.resBib.iterator();
        while (it.hasNext()) {
            BiblioItem resBib = it.next().getResBib();
            if (this.path != null) {
                resBib.buildBiblioSet(biblioSet, this.path);
            }
        }
        String str = ("<tei>\n" + biblioSet.toTEI()) + "<listbibl>\n";
        Iterator<BibDataSet> it2 = this.resBib.iterator();
        while (it2.hasNext()) {
            str = str + "\n" + it2.next().getResBib().toTEI2(biblioSet);
        }
        return str + "\n</listbibl>\n</tei>\n";
    }

    public String reference2TEI(int i) {
        String str = "";
        if (this.resBib != null && i <= this.resBib.size()) {
            BiblioItem resBib = this.resBib.get(i).getResBib();
            if (this.path != null) {
                resBib.setPath(this.path);
            }
            str = str + resBib.toTEI(i);
        }
        return str;
    }

    public String references2BibTeX() {
        String str = "";
        Iterator<BibDataSet> it = this.resBib.iterator();
        while (it.hasNext()) {
            BiblioItem resBib = it.next().getResBib();
            if (this.path != null) {
                resBib.setPath(this.path);
            }
            str = str + "\n" + resBib.toBibTeX();
        }
        return str;
    }

    public String references2TEI() {
        String str = "<listbibl>\n";
        int i = 0;
        Iterator<BibDataSet> it = this.resBib.iterator();
        while (it.hasNext()) {
            BiblioItem resBib = it.next().getResBib();
            if (this.path == null) {
                resBib.setPath(this.path);
            }
            str = str + "\n" + resBib.toTEI(i);
            i++;
        }
        return str + "\n</listbibl>\n";
    }

    public String reference2BibTeX(int i) {
        String str = "";
        if (this.resBib != null && i <= this.resBib.size()) {
            BiblioItem resBib = this.resBib.get(i).getResBib();
            if (this.path == null) {
                resBib.setPath(this.path);
            }
            str = str + resBib.toBibTeX();
        }
        return str;
    }

    private void annotate(File file, ArrayList<PatentItem> arrayList, ArrayList<BibDataSet> arrayList2) {
        try {
            ArrayList arrayList3 = new ArrayList();
            ArrayList arrayList4 = new ArrayList();
            Iterator<PatentItem> it = arrayList.iterator();
            while (it.hasNext()) {
                String context = it.next().getContext();
                arrayList3.add(context);
                String str = " <patcit>" + context + "</patcit> ";
                arrayList4.add(str);
                System.out.println(context + " -> " + str);
            }
            Iterator<BibDataSet> it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                String rawBib = it2.next().getRawBib();
                arrayList3.add(rawBib);
                String str2 = " <nplcit>" + rawBib + "</nplcit> ";
                arrayList4.add(str2);
                System.out.println(rawBib + " -> " + str2);
            }
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), "UTF-8");
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            StringBuffer stringBuffer = new StringBuffer();
            stringBuffer.append("");
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                stringBuffer.append(readLine);
                stringBuffer.append("\n");
            }
            bufferedReader.close();
            inputStreamReader.close();
            int i = 0;
            String stringBuffer2 = stringBuffer.toString();
            Iterator it3 = arrayList3.iterator();
            while (it3.hasNext()) {
                stringBuffer2 = stringBuffer2.replace((String) it3.next(), (String) arrayList4.get(i));
                i++;
            }
            System.out.println(stringBuffer2);
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public void generateTrainingData(String str, String str2) {
        if (str == null) {
            throw new GrobidResourceException("Cannot process the patent file, because the document path is null.");
        }
        if (!str.endsWith(".xml") && !str.endsWith(".xml.gz")) {
            throw new GrobidResourceException("Only patent XML files (ST.36 or Marec) can be processed to generate traning data.");
        }
        File file = new File(str);
        if (!file.exists()) {
            throw new GrobidResourceException("Cannot process the patent file, because path '" + file.getAbsolutePath() + "' does not exists.");
        }
        if (str2 == null) {
            GrobidProperties.getInstance();
            str2 = GrobidProperties.getTempPath().getAbsolutePath();
        }
        File file2 = new File(str2);
        if (!file2.exists()) {
            throw new GrobidResourceException("Cannot process the patent file, because path '" + file2.getAbsolutePath() + "' does not exists.");
        }
        try {
            TextSaxParser textSaxParser = new TextSaxParser();
            textSaxParser.setFilter("description");
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            newInstance.setValidating(false);
            newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
            newInstance.setFeature("http://xml.org/sax/features/validation", false);
            XMLReader createXMLReader = XMLReaderFactory.createXMLReader();
            createXMLReader.setEntityResolver(new EntityResolver() { // from class: org.grobid.core.engines.patent.ReferenceExtractor.2
                @Override // org.xml.sax.EntityResolver
                public InputSource resolveEntity(String str3, String str4) {
                    return new InputSource(new ByteArrayInputStream("<?xml version=\"1.0\" encoding=\"UTF-8\"?>".getBytes()));
                }
            });
            createXMLReader.setContentHandler(textSaxParser);
            InputSource inputSource = str.endsWith(".gz") ? new InputSource(new DataInputStream(new GZIPInputStream(new FileInputStream(str)))) : new InputSource(str);
            inputSource.setEncoding("UTF-8");
            createXMLReader.parse(inputSource);
            String text = textSaxParser.getText();
            String str3 = textSaxParser.currentPatentNumber;
            ArrayList<PatentItem> arrayList = new ArrayList<>();
            ArrayList<BibDataSet> arrayList2 = new ArrayList<>();
            if (text != null) {
                extractAllReferencesString(text, false, 0, false, arrayList, arrayList2);
                OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + str3 + ".training.xml"), false), "UTF-8");
                PatentAnnotationSaxParser patentAnnotationSaxParser = new PatentAnnotationSaxParser();
                patentAnnotationSaxParser.setWriter(outputStreamWriter);
                patentAnnotationSaxParser.setPatents(arrayList);
                patentAnnotationSaxParser.setArticles(arrayList2);
                SAXParserFactory newInstance2 = SAXParserFactory.newInstance();
                newInstance2.setValidating(false);
                newInstance2.setFeature("http://xml.org/sax/features/namespaces", false);
                newInstance2.setFeature("http://xml.org/sax/features/validation", false);
                XMLReader createXMLReader2 = XMLReaderFactory.createXMLReader();
                createXMLReader2.setEntityResolver(new EntityResolver() { // from class: org.grobid.core.engines.patent.ReferenceExtractor.3
                    @Override // org.xml.sax.EntityResolver
                    public InputSource resolveEntity(String str4, String str5) {
                        return new InputSource(new ByteArrayInputStream("<?xml version=\"1.0\" encoding=\"UTF-8\"?>".getBytes()));
                    }
                });
                createXMLReader2.setContentHandler(patentAnnotationSaxParser);
                InputSource inputSource2 = str.endsWith(".gz") ? new InputSource(new DataInputStream(new GZIPInputStream(new FileInputStream(str)))) : new InputSource(str);
                inputSource2.setEncoding("UTF-8");
                createXMLReader2.parse(inputSource2);
                outputStreamWriter.close();
                StringBuffer stringBuffer = new StringBuffer();
                ArrayList arrayList3 = new ArrayList();
                Iterator<BibDataSet> it = arrayList2.iterator();
                while (it.hasNext()) {
                    String rawBib = it.next().getRawBib();
                    if (rawBib.trim().length() > 1) {
                        arrayList3.add(rawBib.trim());
                    }
                }
                if (arrayList3.size() > 0) {
                    Iterator it2 = arrayList3.iterator();
                    while (it2.hasNext()) {
                        String str4 = (String) it2.next();
                        ArrayList arrayList4 = new ArrayList();
                        arrayList4.add(str4);
                        StringBuilder trainingExtraction = this.parsers.getCitationParser().trainingExtraction(arrayList4);
                        if (trainingExtraction != null) {
                            stringBuffer.append(trainingExtraction.toString() + "\n");
                        }
                    }
                }
                if (stringBuffer != null && stringBuffer.length() > 0) {
                    OutputStreamWriter outputStreamWriter2 = new OutputStreamWriter(new FileOutputStream(new File(str2 + "/" + str3 + ".training.references.xml"), false), "UTF-8");
                    outputStreamWriter2.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
                    outputStreamWriter2.write("<citations>\n");
                    outputStreamWriter2.write(stringBuffer.toString());
                    outputStreamWriter2.write("</citations>\n");
                    outputStreamWriter2.close();
                }
            }
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public boolean getDocOPS(String str) {
        try {
            if (this.ops == null) {
                this.ops = new OPSService();
            }
            this.description = this.ops.descriptionRetrieval(str);
            if (this.description == null) {
                return false;
            }
            return this.description.length() >= 600;
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public void generateXMLReport(File file, ArrayList<PatentItem> arrayList, ArrayList<BibDataSet> arrayList2) {
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(file, false);
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF-8");
            StringBuffer stringBuffer = new StringBuffer();
            stringBuffer.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
            if (arrayList.size() > 0 || arrayList2.size() > 0) {
                stringBuffer.append("<citations>\n");
            }
            if (arrayList.size() > 0) {
                stringBuffer.append("<patent-citations>\n");
            }
            int i = 0;
            Iterator<PatentItem> it = arrayList.iterator();
            while (it.hasNext()) {
                PatentItem next = it.next();
                String str = next.getAuthority() + next.getNumberEpoDoc();
                if (next.getKindCode() != null) {
                    str = str + next.getKindCode();
                }
                stringBuffer.append("<patcit if=\"pcit" + i + " dnum=\"" + str + "\"><text>" + next.getContext() + "</text></patcit>");
                stringBuffer.append("\n");
                i++;
            }
            if (arrayList.size() > 0) {
                stringBuffer.append("</patent-citations>\n");
            }
            if (arrayList2.size() > 0) {
                stringBuffer.append("<npl-citations>\n");
            }
            int i2 = 0;
            Iterator<BibDataSet> it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                BibDataSet next2 = it2.next();
                stringBuffer.append("<nplcit if=\"ncit" + i2 + "\">");
                stringBuffer.append(next2.getResBib().toTEI(i2));
                stringBuffer.append("<text>" + next2.getRawBib() + "</text></nplcit>");
                stringBuffer.append("\n");
                i2++;
            }
            if (arrayList2.size() > 0) {
                stringBuffer.append("</npl-citations>\n");
            }
            if (arrayList.size() > 0 || arrayList2.size() > 0) {
                stringBuffer.append("</citations>\n");
            }
            outputStreamWriter.write(stringBuffer.toString());
            outputStreamWriter.close();
            fileOutputStream.close();
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    private static boolean checkPositionRange(int i, int i2, List<OffsetPosition> list) {
        boolean z = false;
        boolean z2 = false;
        if (i == list.size() - 1 && list.get(i).end < i2) {
            z2 = true;
        }
        if (!z2) {
            int i3 = i;
            while (true) {
                if (i3 < list.size()) {
                    if (list.get(i3).start <= i2 && list.get(i3).end >= i2) {
                        z = true;
                        break;
                    }
                    if (list.get(i3).start > i2) {
                        z = false;
                        break;
                    }
                    i3++;
                } else {
                    break;
                }
            }
        }
        return z;
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        this.taggerAll.close();
        this.taggerAll = null;
    }
}
