package org.grobid.trainer.sax;

import java.util.ArrayList;
import java.util.List;
import org.grobid.core.analyzers.GrobidAnalyzer;
import org.grobid.core.lang.Language;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.UnicodeUtil;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/grobid/trainer/sax/TEIAuthorSaxParser.class */
public class TEIAuthorSaxParser extends DefaultHandler {
    private List<List<String>> allLabeled;
    private List<List<LayoutToken>> allTokens;
    private StringBuffer accumulator = new StringBuffer();
    private String output = null;
    private String currentTag = null;
    private List<String> labeled = null;
    private List<LayoutToken> tokens = null;
    private String title = null;
    private String affiliation = null;
    private String address = null;
    private String note = null;
    private String keywords = null;
    public int n = 0;

    public TEIAuthorSaxParser() {
        this.allLabeled = null;
        this.allTokens = null;
        this.allTokens = new ArrayList();
        this.allLabeled = new ArrayList();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        this.accumulator.append(cArr, i, i2);
    }

    public String getText() {
        return this.accumulator.toString().trim();
    }

    public List<List<String>> getLabeledResult() {
        return this.allLabeled;
    }

    public List<List<LayoutToken>> getTokensResult() {
        return this.allTokens;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if ((str3.equals("forename") | str3.equals("middlename") | str3.equals("title") | str3.equals("suffix") | str3.equals("surname") | str3.equals("lastname") | str3.equals("marker") | str3.equals("roleName")) && (this.currentTag != null)) {
            writeField(getText());
        } else if (str3.equals("lb")) {
            this.accumulator.append(" +L+ ");
        } else if (str3.equals("pb")) {
            this.accumulator.append(" +PAGE+ ");
        } else if (str3.equals("author")) {
            String text = getText();
            if (text.length() > 0) {
                this.currentTag = "<other>";
                writeField(text);
            }
            this.allLabeled.add(this.labeled);
            this.allTokens.add(this.tokens);
            this.n++;
        }
        this.accumulator.setLength(0);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        String text = getText();
        if (text.length() > 0) {
            this.currentTag = "<other>";
            writeField(text);
        }
        this.accumulator.setLength(0);
        if (str3.equals("title") || str3.equals("roleName")) {
            this.currentTag = "<title>";
            return;
        }
        if (str3.equals("marker")) {
            this.currentTag = "<marker>";
            return;
        }
        if (str3.equals("surname") || str3.equals("lastname")) {
            this.currentTag = "<surname>";
            return;
        }
        if (str3.equals("middlename")) {
            this.currentTag = "<middlename>";
            return;
        }
        if (str3.equals("forename") || str3.equals("firstname")) {
            this.currentTag = "<forename>";
            return;
        }
        if (str3.equals("suffix")) {
            this.currentTag = "<suffix>";
            return;
        }
        if (str3.equals("author")) {
            this.accumulator = new StringBuffer();
            this.labeled = new ArrayList();
            this.tokens = new ArrayList();
        } else {
            if (str3.equals("analytic") || str3.equals("biblStruct") || str3.equals("sourceDesc") || str3.equals("fileDesc") || str3.equals("teiHeader") || str3.equals("TEI") || str3.equals("persName") || str3.equals("tei") || str3.equals("lb")) {
                return;
            }
            System.out.println("Warning, invalid tag: <" + str3 + ">");
        }
    }

    private void writeField(String str) {
        List<LayoutToken> list = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(str);
        if (list == null || list.size() == 0) {
            list = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(str, new Language("en", 1.0d));
        }
        if (list == null || list.size() == 0) {
            return;
        }
        boolean z = true;
        for (LayoutToken layoutToken : list) {
            if (this.tokens == null) {
                this.tokens = new ArrayList();
                System.out.println("Warning: list of LayoutToken not initialized properly, parsing continue... ");
            }
            if (this.labeled == null) {
                this.labeled = new ArrayList();
                System.out.println("Warning: list of labels not initialized properly, parsing continue... ");
            }
            this.tokens.add(layoutToken);
            String text = layoutToken.getText();
            if (text.equals(" ") || text.equals("\n")) {
                this.labeled.add(null);
            } else {
                String normaliseTextAndRemoveSpaces = UnicodeUtil.normaliseTextAndRemoveSpaces(text);
                if (normaliseTextAndRemoveSpaces.trim().length() == 0) {
                    this.labeled.add(null);
                } else if (normaliseTextAndRemoveSpaces.length() > 0) {
                    if (z) {
                        this.labeled.add("I-" + this.currentTag);
                        z = false;
                    } else {
                        this.labeled.add(this.currentTag);
                    }
                }
            }
        }
    }
}
