package org.grobid.trainer;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.xml.parsers.SAXParserFactory;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.features.FeaturesVectorChemicalEntity;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.trainer.sax.ChemicalClassNamesSaxParser;
import org.grobid.trainer.sax.ChemicalFormulasSaxParser;
import org.grobid.trainer.sax.ChemicalLigandSaxParser;
import org.grobid.trainer.sax.ChemicalNameSaxParser;
import org.grobid.trainer.sax.ChemicalSubstancesSaxParser;
import org.grobid.trainer.sax.ChemicalWordsSaxParser;

/* loaded from: input_file:org/grobid/trainer/ChemicalEntityTrainer.class */
public class ChemicalEntityTrainer extends AbstractTrainer {
    public ChemicalEntityTrainer() {
        super(GrobidModels.ENTITIES_CHEMISTRY);
    }

    public int createCRFPPData2(File file, File file2) {
        return createCRFPPData(file, file2, null, 1.0d);
    }

    @Override // org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2, File file3, double d) {
        return 0;
    }

    @Override // org.grobid.trainer.AbstractTrainer, org.grobid.trainer.Trainer
    public int createCRFPPData(File file, File file2) {
        int i = 0;
        try {
            System.out.println("corpusDir: " + file);
            System.out.println("trainingOutputPath: " + file2);
            File[] listFiles = file.listFiles(new FilenameFilter() { // from class: org.grobid.trainer.ChemicalEntityTrainer.1
                @Override // java.io.FilenameFilter
                public boolean accept(File file3, String str) {
                    return str.endsWith(".words.xml") && str.startsWith("WO");
                }
            });
            if (listFiles == null) {
                return 0;
            }
            System.out.println(listFiles.length + " tei files");
            OutputStreamWriter outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file2), "UTF8");
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            ArrayList<ArrayList<String>> arrayList = null;
            ArrayList<ArrayList<String>> arrayList2 = null;
            ArrayList<ArrayList<String>> arrayList3 = null;
            ArrayList<ArrayList<String>> arrayList4 = null;
            ArrayList<ArrayList<String>> arrayList5 = null;
            for (File file3 : listFiles) {
                if (file3.getName().endsWith(".words.xml")) {
                    System.out.println(file3.getName().replace(".words.xml", ""));
                    if (new File(file3.getPath().replace(".words.xml", ".HC.chemical-names.xml")).exists()) {
                        ChemicalNameSaxParser chemicalNameSaxParser = new ChemicalNameSaxParser();
                        newInstance.newSAXParser().parse(file3, chemicalNameSaxParser);
                        arrayList = chemicalNameSaxParser.getChemicalAnnotations();
                        i += chemicalNameSaxParser.getNumberEntities();
                    }
                    File file4 = new File(file3.getPath().replace(".words.xml", ".HC.formula-names.xml"));
                    if (file4.exists()) {
                        ChemicalFormulasSaxParser chemicalFormulasSaxParser = new ChemicalFormulasSaxParser();
                        newInstance.newSAXParser().parse(file4, chemicalFormulasSaxParser);
                        arrayList2 = chemicalFormulasSaxParser.getChemicalFormulas();
                        i += chemicalFormulasSaxParser.getNumberEntities();
                    }
                    File file5 = new File(file3.getPath().replace(".words.xml", ".HC.substance-names.xml"));
                    if (file5.exists()) {
                        ChemicalSubstancesSaxParser chemicalSubstancesSaxParser = new ChemicalSubstancesSaxParser();
                        newInstance.newSAXParser().parse(file5, chemicalSubstancesSaxParser);
                        arrayList3 = chemicalSubstancesSaxParser.getChemicalSubstances();
                        i += chemicalSubstancesSaxParser.getNumberEntities();
                    }
                    File file6 = new File(file3.getPath().replace(".words.xml", ".HC.class-names.xml"));
                    if (file6.exists()) {
                        ChemicalClassNamesSaxParser chemicalClassNamesSaxParser = new ChemicalClassNamesSaxParser();
                        newInstance.newSAXParser().parse(file6, chemicalClassNamesSaxParser);
                        arrayList4 = chemicalClassNamesSaxParser.getChemicalClassNames();
                        i += chemicalClassNamesSaxParser.getNumberEntities();
                    }
                    File file7 = new File(file3.getPath().replace(".words.xml", ".HC.ligand.xml"));
                    if (file7.exists()) {
                        ChemicalLigandSaxParser chemicalLigandSaxParser = new ChemicalLigandSaxParser();
                        newInstance.newSAXParser().parse(file7, chemicalLigandSaxParser);
                        arrayList5 = chemicalLigandSaxParser.getChemicalLigand();
                        i += chemicalLigandSaxParser.getNumberEntities();
                    }
                }
                ArrayList arrayList6 = new ArrayList();
                ArrayList arrayList7 = new ArrayList();
                if (arrayList != null) {
                    Iterator<ArrayList<String>> it = arrayList.iterator();
                    while (it.hasNext()) {
                        ArrayList<String> next = it.next();
                        String trim = next.get(0).trim();
                        String trim2 = next.get(1).trim();
                        if (trim.length() > trim2.length()) {
                            trim2 = next.get(0);
                            trim = next.get(1);
                        } else if (trim.length() == trim2.length() && trim.compareToIgnoreCase(trim2) > 0) {
                            trim2 = next.get(0);
                            trim = next.get(1);
                        }
                        arrayList7.add(trim);
                        if (!trim2.equals(trim)) {
                            String str = trim;
                            while (!str.equals(trim2)) {
                                int lastIndexOf = str.lastIndexOf("_");
                                try {
                                    str = str.substring(0, lastIndexOf + 1) + (Integer.valueOf(Integer.parseInt(str.substring(lastIndexOf + 1, str.length()))).intValue() + 1);
                                    if (!str.equals(trim)) {
                                        arrayList6.add(str);
                                    }
                                } catch (NumberFormatException e) {
                                    throw new GrobidException("An exception occured while running Grobid.", e);
                                }
                            }
                        }
                        arrayList6.add(trim2);
                    }
                }
                ArrayList arrayList8 = new ArrayList();
                ArrayList arrayList9 = new ArrayList();
                if (arrayList2 != null) {
                    Iterator<ArrayList<String>> it2 = arrayList2.iterator();
                    while (it2.hasNext()) {
                        ArrayList<String> next2 = it2.next();
                        String trim3 = next2.get(0).trim();
                        String trim4 = next2.get(1).trim();
                        if (trim3.length() > trim4.length()) {
                            trim4 = next2.get(0);
                            trim3 = next2.get(1);
                        } else if (trim3.length() == trim4.length() && trim3.compareToIgnoreCase(trim4) > 0) {
                            trim4 = next2.get(0);
                            trim3 = next2.get(1);
                        }
                        arrayList9.add(trim3);
                        if (!trim4.equals(trim3)) {
                            String str2 = trim3;
                            while (!str2.equals(trim4)) {
                                int lastIndexOf2 = str2.lastIndexOf("_");
                                try {
                                    str2 = str2.substring(0, lastIndexOf2 + 1) + (Integer.valueOf(Integer.parseInt(str2.substring(lastIndexOf2 + 1, str2.length()))).intValue() + 1);
                                    if (!str2.equals(trim3)) {
                                        arrayList8.add(str2);
                                    }
                                } catch (NumberFormatException e2) {
                                    throw new GrobidException("An exception occured while running Grobid.", e2);
                                }
                            }
                        }
                        arrayList8.add(trim4);
                    }
                }
                ArrayList arrayList10 = new ArrayList();
                ArrayList arrayList11 = new ArrayList();
                if (arrayList3 != null) {
                    Iterator<ArrayList<String>> it3 = arrayList3.iterator();
                    while (it3.hasNext()) {
                        ArrayList<String> next3 = it3.next();
                        String trim5 = next3.get(0).trim();
                        String trim6 = next3.get(1).trim();
                        if (trim5.length() > trim6.length()) {
                            trim6 = next3.get(0);
                            trim5 = next3.get(1);
                        } else if (trim5.length() == trim6.length() && trim5.compareToIgnoreCase(trim6) > 0) {
                            trim6 = next3.get(0);
                            trim5 = next3.get(1);
                        }
                        arrayList11.add(trim5);
                        if (!trim6.equals(trim5)) {
                            String str3 = trim5;
                            while (!str3.equals(trim6)) {
                                int lastIndexOf3 = str3.lastIndexOf("_");
                                try {
                                    str3 = str3.substring(0, lastIndexOf3 + 1) + (Integer.valueOf(Integer.parseInt(str3.substring(lastIndexOf3 + 1, str3.length()))).intValue() + 1);
                                    if (!str3.equals(trim5)) {
                                        arrayList10.add(str3);
                                    }
                                } catch (NumberFormatException e3) {
                                    throw new GrobidException("An exception occured while running Grobid.", e3);
                                }
                            }
                        }
                        arrayList10.add(trim6);
                    }
                }
                ArrayList arrayList12 = new ArrayList();
                ArrayList arrayList13 = new ArrayList();
                if (arrayList4 != null) {
                    Iterator<ArrayList<String>> it4 = arrayList4.iterator();
                    while (it4.hasNext()) {
                        ArrayList<String> next4 = it4.next();
                        String trim7 = next4.get(0).trim();
                        String trim8 = next4.get(1).trim();
                        if (trim7.length() > trim8.length()) {
                            trim8 = next4.get(0);
                            trim7 = next4.get(1);
                        } else if (trim7.length() == trim8.length() && trim7.compareToIgnoreCase(trim8) > 0) {
                            trim8 = next4.get(0);
                            trim7 = next4.get(1);
                        }
                        arrayList13.add(trim7);
                        if (!trim8.equals(trim7)) {
                            String str4 = trim7;
                            while (!str4.equals(trim8)) {
                                int lastIndexOf4 = str4.lastIndexOf("_");
                                try {
                                    str4 = str4.substring(0, lastIndexOf4 + 1) + (Integer.valueOf(Integer.parseInt(str4.substring(lastIndexOf4 + 1, str4.length()))).intValue() + 1);
                                    if (!str4.equals(trim7)) {
                                        arrayList12.add(str4);
                                    }
                                } catch (NumberFormatException e4) {
                                    throw new GrobidException("An exception occured while running Grobid.", e4);
                                }
                            }
                        }
                        arrayList12.add(trim8);
                    }
                }
                ArrayList arrayList14 = new ArrayList();
                ArrayList arrayList15 = new ArrayList();
                if (arrayList5 != null) {
                    Iterator<ArrayList<String>> it5 = arrayList5.iterator();
                    while (it5.hasNext()) {
                        ArrayList<String> next5 = it5.next();
                        String trim9 = next5.get(0).trim();
                        String trim10 = next5.get(1).trim();
                        if (trim9.length() > trim10.length()) {
                            trim10 = next5.get(0);
                            trim9 = next5.get(1);
                        } else if (trim9.length() == trim10.length() && trim9.compareToIgnoreCase(trim10) > 0) {
                            trim10 = next5.get(0);
                            trim9 = next5.get(1);
                        }
                        arrayList15.add(trim9);
                        if (!trim10.equals(trim9)) {
                            String str5 = trim9;
                            while (!str5.equals(trim10)) {
                                int lastIndexOf5 = str5.lastIndexOf("_");
                                try {
                                    str5 = str5.substring(0, lastIndexOf5 + 1) + (Integer.valueOf(Integer.parseInt(str5.substring(lastIndexOf5 + 1, str5.length()))).intValue() + 1);
                                    if (!str5.equals(trim9)) {
                                        arrayList14.add(str5);
                                    }
                                } catch (NumberFormatException e5) {
                                    throw new GrobidException("An exception occured while running Grobid.", e5);
                                }
                            }
                        }
                        arrayList14.add(trim10);
                    }
                }
                ChemicalWordsSaxParser chemicalWordsSaxParser = new ChemicalWordsSaxParser();
                chemicalWordsSaxParser.setChemicalAnnotations(arrayList6, arrayList7);
                chemicalWordsSaxParser.setChemicalFormulas(arrayList8, arrayList9);
                chemicalWordsSaxParser.setChemicalSubstances(arrayList10, arrayList11);
                chemicalWordsSaxParser.setChemicalClassNames(arrayList12, arrayList13);
                chemicalWordsSaxParser.setChemicalLigand(arrayList14, arrayList15);
                try {
                    File file8 = new File(file3.getParent() + File.separator + file3.getName().replace(".HC.chemical-names.xml", ".words.xml"));
                    if (file8 != null) {
                        newInstance.newSAXParser().parse(file8, chemicalWordsSaxParser);
                        addFeatures(chemicalWordsSaxParser.getLabeledResult(), outputStreamWriter, null, null);
                        outputStreamWriter.write("\n");
                    }
                } catch (Exception e6) {
                    throw new GrobidException("An exception occured while running Grobid.", e6);
                }
            }
            outputStreamWriter.close();
            return i;
        } catch (Exception e7) {
            throw new GrobidException("An exception occured while running Grobid.", e7);
        }
    }

    public void addFeatures(List<String> list, Writer writer, List<OffsetPosition> list2, List<OffsetPosition> list3) {
        int size = list.size();
        int i = 0;
        try {
            Iterator<String> it = list.iterator();
            while (it.hasNext()) {
                FeaturesVectorChemicalEntity addFeaturesChemicalEntities = FeaturesVectorChemicalEntity.addFeaturesChemicalEntities(it.next(), size, i, false, false);
                if (addFeaturesChemicalEntities.label != null) {
                    writer.write(addFeaturesChemicalEntities.printVector());
                    writer.flush();
                    i++;
                }
            }
        } catch (Exception e) {
            throw new GrobidException("An exception occured while running Grobid.", e);
        }
    }

    public static void main(String[] strArr) {
        ChemicalEntityTrainer chemicalEntityTrainer = new ChemicalEntityTrainer();
        AbstractTrainer.runTraining(chemicalEntityTrainer);
        System.out.println(AbstractTrainer.runEvaluation(chemicalEntityTrainer));
        System.exit(0);
    }
}
