package org.grobid.trainer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.StringTokenizer;
import javax.xml.parsers.SAXParserFactory;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.UnicodeUtil;
import org.grobid.trainer.sax.TEISegmentationSaxParser;

/* loaded from: input_file:org/grobid/trainer/SegmentationTrainer.class */
public class SegmentationTrainer extends AbstractTrainer {
    public SegmentationTrainer() {
        super(GrobidModels.SEGMENTATION);
        this.epsilon = 1.0E-7d;
        this.window = 50;
        this.nbMaxIterations = 1000;
    }

    @Override // org.grobid.trainer.AbstractTrainer, org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2) {
        return addFeaturesSegmentation(file.getAbsolutePath() + "/tei", file.getAbsolutePath() + "/raw", file2, null, 1.0d);
    }

    @Override // org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2, File file3, double d) {
        return addFeaturesSegmentation(file.getAbsolutePath() + "/tei", file.getAbsolutePath() + "/raw", file2, file3, d);
    }

    public int addFeaturesSegmentation(String str, String str2, File file, File file2, double d) {
        try {
            System.out.println("sourceTEIPathLabel: " + str);
            System.out.println("sourceRawPathLabel: " + str2);
            System.out.println("trainingOutputPath: " + file);
            System.out.println("evalOutputPath: " + file2);
            File[] listFiles = new File(str).listFiles(new FilenameFilter() { // from class: org.grobid.trainer.SegmentationTrainer.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file3, String str3) {
                    return str3.endsWith(".tei.xml") || str3.endsWith(".tei");
                }
            });
            if (listFiles == null) {
                return 0;
            }
            System.out.println(listFiles.length + " tei files");
            FileOutputStream fileOutputStream = null;
            OutputStreamWriter outputStreamWriter = null;
            if (file != null) {
                fileOutputStream = new FileOutputStream(file);
                outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF8");
            }
            FileOutputStream fileOutputStream2 = null;
            OutputStreamWriter outputStreamWriter2 = null;
            if (file2 != null) {
                fileOutputStream2 = new FileOutputStream(file2);
                outputStreamWriter2 = new OutputStreamWriter(fileOutputStream2, "UTF8");
            }
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            for (File file3 : listFiles) {
                String name = file3.getName();
                LOGGER.info("Processing: " + name);
                TEISegmentationSaxParser tEISegmentationSaxParser = new TEISegmentationSaxParser();
                newInstance.newSAXParser().parse(file3, tEISegmentationSaxParser);
                List<String> labeledResult = tEISegmentationSaxParser.getLabeledResult();
                try {
                    File file4 = new File(str2 + File.separator + name.replace(".tei.xml", ""));
                    if (file4.exists()) {
                        int i = 0;
                        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file4), "UTF8"));
                        StringBuilder sb = new StringBuilder();
                        int i2 = 0;
                        String str3 = null;
                        int i3 = 0;
                        do {
                            String readLine = bufferedReader.readLine();
                            if (readLine == null) {
                                break;
                            }
                            i2++;
                            int indexOf = readLine.indexOf(32);
                            String normaliseTextAndRemoveSpaces = indexOf != -1 ? UnicodeUtil.normaliseTextAndRemoveSpaces(readLine.substring(0, indexOf)) : null;
                            int i4 = i;
                            while (true) {
                                if (i4 >= labeledResult.size()) {
                                    break;
                                }
                                StringTokenizer stringTokenizer = new StringTokenizer(labeledResult.get(i4), " \t");
                                if (stringTokenizer.hasMoreTokens() && UnicodeUtil.normaliseTextAndRemoveSpaces(stringTokenizer.nextToken()).equals(normaliseTextAndRemoveSpaces)) {
                                    String nextToken = stringTokenizer.nextToken();
                                    sb.append(readLine).append(" ").append(nextToken);
                                    str3 = nextToken;
                                    i = i4 + 1;
                                    i3 = 0;
                                    break;
                                }
                                if (i4 - i > 5) {
                                    i3++;
                                    if (str3 != null) {
                                        sb.append(readLine).append(" ").append(str3);
                                    }
                                } else {
                                    i4++;
                                }
                            }
                        } while (i3 <= 20);
                        bufferedReader.close();
                        if (i3 < 10) {
                            if (outputStreamWriter == null && outputStreamWriter2 != null) {
                                outputStreamWriter2.write(sb.toString() + "\n");
                            }
                            if (outputStreamWriter != null && outputStreamWriter2 == null) {
                                outputStreamWriter.write(sb.toString() + "\n");
                            } else if (Math.random() <= d) {
                                outputStreamWriter.write(sb.toString() + "\n");
                            } else {
                                outputStreamWriter2.write(sb.toString() + "\n");
                            }
                        } else {
                            LOGGER.warn(name + " / too many synchronization issues, file not used in training data and to be fixed!");
                        }
                    } else {
                        LOGGER.error("The raw file does not exist: " + file4.getPath());
                    }
                } catch (Exception e) {
                    LOGGER.error("Fail to open or process raw file", e);
                }
            }
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
                fileOutputStream.close();
            }
            if (outputStreamWriter2 != null) {
                outputStreamWriter2.close();
                fileOutputStream2.close();
            }
            return 0;
        } catch (Exception e2) {
            throw new GrobidException("An exception occured while running Grobid.", e2);
        }
    }

    public static void main(String[] strArr) throws Exception {
        GrobidProperties.getInstance();
        AbstractTrainer.runTraining(new SegmentationTrainer());
        System.out.println(AbstractTrainer.runEvaluation(new SegmentationTrainer()));
        System.exit(0);
    }
}
