package org.grobid.trainer;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.StringTokenizer;
import javax.xml.parsers.SAXParserFactory;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.UnicodeUtil;
import org.grobid.trainer.sax.TEIHeaderSaxParser;

/* loaded from: input_file:org/grobid/trainer/HeaderTrainer.class */
public class HeaderTrainer extends AbstractTrainer {
    public HeaderTrainer() {
        super(GrobidModels.HEADER);
    }

    @Override // org.grobid.trainer.AbstractTrainer, org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2) {
        return addFeaturesHeaders(file.getAbsolutePath() + "/tei", file.getAbsolutePath() + "/headers", file2, null, 1.0d);
    }

    @Override // org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2, File file3, double d) {
        return addFeaturesHeaders(file.getAbsolutePath() + "/tei", file.getAbsolutePath() + "/headers", file2, file3, d);
    }

    public int addFeaturesHeaders(String str, String str2, File file, File file2, double d) {
        System.out.println(str);
        System.out.println(str2);
        System.out.println(file);
        System.out.println(file2);
        System.out.println("TEI files: " + str);
        System.out.println("header info files: " + str2);
        if (file != null) {
            System.out.println("outputPath for training data: " + file);
        }
        if (file2 != null) {
            System.out.println("outputPath for evaluation data: " + file2);
        }
        try {
            File[] listFiles = new File(str).listFiles(new FilenameFilter() { // from class: org.grobid.trainer.HeaderTrainer.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file3, String str3) {
                    return str3.endsWith(".tei") | str3.endsWith(".tei.xml");
                }
            });
            if (listFiles == null) {
                return 0;
            }
            int length = listFiles.length;
            System.out.println(length + " tei files");
            FileOutputStream fileOutputStream = null;
            OutputStreamWriter outputStreamWriter = null;
            if (file != null) {
                fileOutputStream = new FileOutputStream(file);
                outputStreamWriter = new OutputStreamWriter(fileOutputStream, "UTF8");
            }
            FileOutputStream fileOutputStream2 = null;
            OutputStreamWriter outputStreamWriter2 = null;
            if (file2 != null) {
                fileOutputStream2 = new FileOutputStream(file2);
                outputStreamWriter2 = new OutputStreamWriter(fileOutputStream2, "UTF8");
            }
            for (File file3 : listFiles) {
                String name = file3.getName();
                TEIHeaderSaxParser tEIHeaderSaxParser = new TEIHeaderSaxParser();
                tEIHeaderSaxParser.setFileName(name);
                SAXParserFactory.newInstance().newSAXParser().parse(file3, tEIHeaderSaxParser);
                ArrayList<String> labeledResult = tEIHeaderSaxParser.getLabeledResult();
                String str3 = null;
                File[] listFiles2 = new File(str2).listFiles();
                int length2 = listFiles2.length;
                int i = 0;
                while (true) {
                    if (i >= length2) {
                        break;
                    }
                    String name2 = listFiles2[i].getName();
                    if (name2.equals(tEIHeaderSaxParser.getPDFName() + ".header")) {
                        str3 = name2;
                        break;
                    }
                    if (name2.startsWith(tEIHeaderSaxParser.getPDFName() + "._") && name2.endsWith(".header")) {
                        str3 = name2;
                        break;
                    }
                    i++;
                }
                if (str3 != null) {
                    int i2 = 0;
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str2 + File.separator + str3), "UTF8"));
                    StringBuilder sb = new StringBuilder();
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        sb.append(readLine);
                        int indexOf = readLine.indexOf(32);
                        String normaliseTextAndRemoveSpaces = indexOf != -1 ? UnicodeUtil.normaliseTextAndRemoveSpaces(readLine.substring(0, indexOf)) : null;
                        int i3 = i2;
                        while (i3 < labeledResult.size()) {
                            StringTokenizer stringTokenizer = new StringTokenizer(labeledResult.get(i3), " ");
                            if (stringTokenizer.hasMoreTokens() && UnicodeUtil.normaliseTextAndRemoveSpaces(stringTokenizer.nextToken()).equals(normaliseTextAndRemoveSpaces)) {
                                sb.append(" ").append(stringTokenizer.nextToken());
                                i2 = i3 + 1;
                                i3 = i2 + 10;
                            }
                            if (i3 - i2 > 5) {
                                break;
                            }
                            i3++;
                        }
                        sb.append("\n");
                    }
                    bufferedReader.close();
                    StringBuilder sb2 = new StringBuilder();
                    StringTokenizer stringTokenizer2 = new StringTokenizer(sb.toString(), "\n");
                    String str4 = null;
                    Object obj = null;
                    String str5 = null;
                    while (stringTokenizer2.hasMoreTokens()) {
                        String nextToken = stringTokenizer2.nextToken();
                        StringTokenizer stringTokenizer3 = new StringTokenizer(nextToken, " ");
                        String str6 = null;
                        while (stringTokenizer3.hasMoreTokens()) {
                            str6 = stringTokenizer3.nextToken();
                        }
                        if (str6 != null && str6.length() > 0) {
                            if (!((str6.charAt(0) == '<') | str6.startsWith("I-<"))) {
                                str6 = null;
                            }
                        }
                        if (str5 != null) {
                            if (((str6 != null) && (str4 == null)) && (obj != null)) {
                                if (str6.equals(obj)) {
                                    str4 = str6;
                                    sb2.append(str5 + " " + str6);
                                    sb2.append("\n");
                                } else if (str4 != null) {
                                    sb2.append(str5);
                                    sb2.append("\n");
                                }
                            } else if (str4 != null) {
                                sb2.append(str5);
                                sb2.append("\n");
                            }
                        }
                        str5 = nextToken;
                        obj = str4;
                        str4 = str6;
                    }
                    if (str4 != null) {
                        sb2.append(str5);
                        sb2.append("\n");
                    }
                    if (outputStreamWriter == null && outputStreamWriter2 != null) {
                        outputStreamWriter2.write(sb2.toString() + "\n");
                    }
                    if (outputStreamWriter != null && outputStreamWriter2 == null) {
                        outputStreamWriter.write(sb2.toString() + "\n");
                    } else if (Math.random() <= d) {
                        outputStreamWriter.write(sb2.toString() + "\n");
                    } else {
                        outputStreamWriter2.write(sb2.toString() + "\n");
                    }
                }
            }
            if (outputStreamWriter != null) {
                outputStreamWriter.close();
                fileOutputStream.close();
            }
            if (outputStreamWriter2 != null) {
                outputStreamWriter2.close();
                fileOutputStream2.close();
            }
            return length;
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public static void main(String[] strArr) throws Exception {
        GrobidProperties.getInstance();
        AbstractTrainer.runTraining(new HeaderTrainer());
        System.out.println(AbstractTrainer.runEvaluation(new HeaderTrainer()));
        System.exit(0);
    }
}
