package org.grobid.trainer;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.LinkedList;
import java.util.List;
import java.util.StringTokenizer;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.features.FeaturesVectorReference;
import org.grobid.core.sax.MarecSaxParser;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.trainer.evaluation.PatentEvaluation;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/grobid/trainer/PatentParserTrainer.class */
public class PatentParserTrainer extends AbstractTrainer {
    private static final int trainWindow = 200;

    public PatentParserTrainer() {
        super(GrobidModels.PATENT_PATENT);
        this.epsilon = 1.0E-4d;
        this.window = 20;
    }

    public int createTrainingData(String str) {
        try {
            createDataSet(null, null, new File(new File(getFilePath2Resources(), "dataset/patent/corpus/").getAbsolutePath()).getAbsolutePath(), str, 0);
            return 0;
        } catch (Exception e) {
            throw new GrobidException("An exception occurred while training Grobid.", e);
        }
    }

    @Override // org.grobid.trainer.AbstractTrainer, org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2) {
        return 0;
    }

    @Override // org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2, File file3, double d) {
        return 0;
    }

    @Override // org.grobid.trainer.AbstractTrainer, org.grobid.trainer.Trainer
    public void train() {
        createTrainingData(GrobidProperties.getTempPath().getAbsolutePath());
        File file = new File(GrobidProperties.getTempPath() + "/all.train");
        File file2 = new File(getFilePath2Resources(), "dataset/patent/crfpp-templates/text.references.template");
        GenericTrainer trainer = TrainerFactory.getTrainer();
        trainer.setEpsilon(this.epsilon);
        trainer.setWindow(this.window);
        File file3 = new File(GrobidProperties.getModelPath(GrobidModels.PATENT_ALL).getAbsolutePath() + AbstractTrainer.NEW_MODEL_EXT);
        trainer.train(file2, file, file3, GrobidProperties.getNBThreads().intValue(), this.model);
        renameModels(GrobidProperties.getModelPath(GrobidModels.PATENT_ALL), file3);
    }

    public void createDataSet(String str, String str2, String str3, String str4, int i) {
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        try {
            MarecSaxParser marecSaxParser = new MarecSaxParser();
            marecSaxParser.patentReferences = true;
            marecSaxParser.nplReferences = false;
            int i6 = 0;
            int i7 = 0;
            if (i == 0) {
                marecSaxParser.setN(trainWindow);
            } else {
                marecSaxParser.setN(-1);
            }
            DefaultHandler marecSaxParser2 = new MarecSaxParser();
            ((MarecSaxParser) marecSaxParser2).patentReferences = true;
            ((MarecSaxParser) marecSaxParser2).nplReferences = true;
            if (i == 0) {
                marecSaxParser2.setN(trainWindow);
            } else {
                marecSaxParser2.setN(-1);
            }
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            newInstance.setValidating(false);
            newInstance.setFeature("http://xml.org/sax/features/namespaces", false);
            newInstance.setFeature("http://xml.org/sax/features/validation", false);
            LinkedList linkedList = new LinkedList();
            if (str == null) {
                linkedList.add(new File(str3));
            } else if (str2 == null) {
                linkedList.add(new File(str3));
            } else {
                linkedList.add(new File(str3 + File.separator + str + "ing" + str2 + File.separator));
            }
            OutputStreamWriter outputStreamWriter = (str == null || str.length() == 0) ? new OutputStreamWriter(new FileOutputStream(new File(str4 + File.separator + "all.train"), false), "UTF-8") : str2 == null ? new OutputStreamWriter(new FileOutputStream(new File(str4 + File.separator + "all." + str), false), "UTF-8") : new OutputStreamWriter(new FileOutputStream(new File(str4 + File.separator + str + "ing" + str2 + File.separator + "all." + str), false), "UTF-8");
            while (linkedList.size() > 0) {
                File file = (File) linkedList.removeFirst();
                if (file.isDirectory()) {
                    for (File file2 : file.listFiles()) {
                        linkedList.addLast(file2);
                    }
                } else if (file.getName().endsWith(".xml")) {
                    i2++;
                    try {
                        SAXParser newSAXParser = newInstance.newSAXParser();
                        FileInputStream fileInputStream = new FileInputStream(file);
                        marecSaxParser2.setFileName(file.toString());
                        newSAXParser.parse(fileInputStream, marecSaxParser2);
                        i3 += marecSaxParser2.getNbNPLRef();
                        i4 += marecSaxParser2.getNbPatentRef();
                        if (((MarecSaxParser) marecSaxParser2).nbAllRef > i5) {
                            i5 = ((MarecSaxParser) marecSaxParser2).nbAllRef;
                        }
                        if (((MarecSaxParser) marecSaxParser2).citations != null && ((MarecSaxParser) marecSaxParser2).citations.size() > i6) {
                            i6 = ((MarecSaxParser) marecSaxParser2).citations.size();
                            i7++;
                        }
                        List<OffsetPosition> list = ((MarecSaxParser) marecSaxParser2).journalsPositions;
                        List<OffsetPosition> list2 = ((MarecSaxParser) marecSaxParser2).abbrevJournalsPositions;
                        List<OffsetPosition> list3 = ((MarecSaxParser) marecSaxParser2).conferencesPositions;
                        List<OffsetPosition> list4 = ((MarecSaxParser) marecSaxParser2).publishersPositions;
                        if (((MarecSaxParser) marecSaxParser2).accumulatedText != null) {
                            addFeatures(((MarecSaxParser) marecSaxParser2).accumulatedText.toString(), outputStreamWriter, list, list2, list3, list4);
                            outputStreamWriter.write("\n");
                        }
                    } catch (Exception e) {
                        throw new GrobidException("An exception occured while running Grobid.", e);
                    }
                } else {
                    continue;
                }
            }
            if (((MarecSaxParser) marecSaxParser2).citations != null) {
                int size = 0 + ((MarecSaxParser) marecSaxParser2).citations.size();
            }
            if (str != null) {
                System.out.println(str + "ing on " + i2 + " files");
            } else {
                System.out.println("training on " + i2 + " files");
            }
            System.out.println("Number of references: " + (i3 + i4));
            System.out.println("Number of patent references: " + i4);
            System.out.println("Number of NPL references: " + i3);
            System.out.println("Average number of references: " + TextUtilities.formatTwoDecimals((i3 + i4) / i2));
            System.out.println("Max number of references in file: " + i5);
            if (str == null || str.length() == 0) {
                System.out.println("common data set under: " + str4 + "/all.train");
            } else {
                System.out.println("common data set under: " + str4 + "/all." + str);
            }
        } catch (Exception e2) {
            throw new GrobidException("An exception occurred while running Grobid.", e2);
        }
    }

    public void addFeatures(String str, Writer writer, List<OffsetPosition> list, List<OffsetPosition> list2, List<OffsetPosition> list3, List<OffsetPosition> list4) {
        try {
            StringTokenizer stringTokenizer = new StringTokenizer(str, "\n");
            int countTokens = stringTokenizer.countTokens();
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            int i4 = 0;
            int i5 = 0;
            while (stringTokenizer.hasMoreTokens()) {
                boolean z = false;
                boolean z2 = false;
                boolean z3 = false;
                boolean z4 = false;
                boolean z5 = false;
                String nextToken = stringTokenizer.nextToken();
                if (nextToken.trim().length() == 0) {
                    writer.write("\n");
                    i = 0;
                } else if (nextToken.endsWith("\t<ignore>")) {
                    i++;
                } else {
                    if (list != null) {
                        if (i2 == list.size() - 1 && list.get(i2).end < i) {
                            z5 = true;
                        }
                        if (!z5) {
                            int i6 = i2;
                            while (true) {
                                if (i6 < list.size()) {
                                    if (list.get(i6).start <= i && list.get(i6).end >= i) {
                                        z = true;
                                        i2 = i6;
                                        break;
                                    } else {
                                        if (list.get(i6).start > i) {
                                            z = false;
                                            i2 = i6;
                                            break;
                                        }
                                        i6++;
                                    }
                                } else {
                                    break;
                                }
                            }
                        }
                    }
                    boolean z6 = false;
                    if (list2 != null) {
                        if (i3 == list2.size() - 1 && list2.get(i3).end < i) {
                            z6 = true;
                        }
                        if (!z6) {
                            int i7 = i3;
                            while (true) {
                                if (i7 < list2.size()) {
                                    if (list2.get(i7).start <= i && list2.get(i7).end >= i) {
                                        z2 = true;
                                        i3 = i7;
                                        break;
                                    } else {
                                        if (list2.get(i7).start > i) {
                                            z2 = false;
                                            i3 = i7;
                                            break;
                                        }
                                        i7++;
                                    }
                                } else {
                                    break;
                                }
                            }
                        }
                    }
                    boolean z7 = false;
                    if (list3 != null) {
                        if (i4 == list3.size() - 1 && list3.get(i4).end < i) {
                            z7 = true;
                        }
                        if (!z7) {
                            int i8 = i4;
                            while (true) {
                                if (i8 < list3.size()) {
                                    if (list3.get(i8).start <= i && list3.get(i8).end >= i) {
                                        z3 = true;
                                        i4 = i8;
                                        break;
                                    } else {
                                        if (list3.get(i8).start > i) {
                                            z3 = false;
                                            i4 = i8;
                                            break;
                                        }
                                        i8++;
                                    }
                                } else {
                                    break;
                                }
                            }
                        }
                    }
                    boolean z8 = false;
                    if (list4 != null) {
                        if (i5 == list4.size() - 1 && list4.get(i5).end < i) {
                            z8 = true;
                        }
                        if (!z8) {
                            int i9 = i5;
                            while (true) {
                                if (i9 < list4.size()) {
                                    if (list4.get(i9).start <= i && list4.get(i9).end >= i) {
                                        z4 = true;
                                        i5 = i9;
                                        break;
                                    } else {
                                        if (list4.get(i9).start > i) {
                                            z4 = false;
                                            i5 = i9;
                                            break;
                                        }
                                        i9++;
                                    }
                                } else {
                                    break;
                                }
                            }
                        }
                    }
                    FeaturesVectorReference addFeaturesPatentReferences = FeaturesVectorReference.addFeaturesPatentReferences(nextToken, countTokens, i, z, z2, z3, z4);
                    if (addFeaturesPatentReferences.label != null) {
                        writer.write(addFeaturesPatentReferences.printVector());
                        writer.flush();
                        i++;
                    }
                }
            }
        } catch (Exception e) {
            throw new GrobidException("An exception occurred while running Grobid.", e);
        }
    }

    @Override // org.grobid.trainer.AbstractTrainer, org.grobid.trainer.Trainer
    public String evaluate() {
        return new PatentEvaluation().evaluate();
    }

    public static void main(String[] strArr) throws Exception {
        GrobidProperties.getInstance();
        AbstractTrainer.runTraining(new PatentParserTrainer());
        System.exit(0);
    }
}
