package org.grobid.trainer.sax;

import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
import java.util.StringTokenizer;
import org.grobid.core.utilities.TextUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/grobid/trainer/sax/TEIFulltextSaxParser.class */
public class TEIFulltextSaxParser extends DefaultHandler {
    private static final Logger logger = LoggerFactory.getLogger(TEIFulltextSaxParser.class);
    private StringBuffer accumulator;
    private Stack<String> currentTags;
    private ArrayList<String> labeled;
    private String output = null;
    private String currentTag = null;
    private boolean figureBlock = false;
    private boolean tableBlock = false;

    public TEIFulltextSaxParser() {
        this.accumulator = null;
        this.currentTags = null;
        this.labeled = null;
        this.labeled = new ArrayList<>();
        this.currentTags = new Stack<>();
        this.accumulator = new StringBuffer();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        this.accumulator.append(cArr, i, i2);
    }

    public String getText() {
        if (this.accumulator != null) {
            return this.accumulator.toString().trim();
        }
        return null;
    }

    public List<String> getLabeledResult() {
        return this.labeled;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (!str3.equals("lb") && !str3.equals("pb") && !str3.equals("space")) {
            writeData(str3, true);
            if (!this.currentTags.empty()) {
                this.currentTag = this.currentTags.peek();
            }
        }
        if (str3.equals("figure")) {
            this.figureBlock = false;
            this.tableBlock = false;
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (str3.equals("lb")) {
            this.accumulator.append(" +LINE+ ");
            return;
        }
        if (str3.equals("space")) {
            this.accumulator.append(" ");
            return;
        }
        String text = getText();
        if (text != null && text.length() > 0) {
            writeData(str3, false);
        }
        this.accumulator.setLength(0);
        if (str3.equals("div")) {
            int length = attributes.getLength();
            for (int i = 0; i < length; i++) {
                String qName = attributes.getQName(i);
                String value = attributes.getValue(i);
                if (qName != null) {
                    if (!qName.equals("type")) {
                        logger.error("Invalid attribute name for element div: " + qName);
                    } else if (value.equals("paragraph")) {
                        this.currentTags.push("<paragraph>");
                        this.currentTag = "<paragraph>";
                    } else {
                        logger.error("Invalid attribute value for element div: " + qName + "=" + value);
                    }
                }
            }
            return;
        }
        if (str3.equals("p")) {
            this.currentTags.push("<paragraph>");
            this.currentTag = "<paragraph>";
            return;
        }
        if (str3.equals("other")) {
            this.currentTags.push("<other>");
            this.currentTag = "<other>";
            return;
        }
        if (str3.equals("ref")) {
            int length2 = attributes.getLength();
            for (int i2 = 0; i2 < length2; i2++) {
                String qName2 = attributes.getQName(i2);
                String value2 = attributes.getValue(i2);
                if (qName2 != null) {
                    if (!qName2.equals("type")) {
                        logger.error("Invalid attribute name for element ref: " + qName2);
                    } else if (value2.equals("biblio")) {
                        this.currentTags.push("<citation_marker>");
                        this.currentTag = "<citation_marker>";
                    } else if (value2.equals("figure")) {
                        this.currentTags.push("<figure_marker>");
                        this.currentTag = "<figure_marker>";
                    } else if (value2.equals("table")) {
                        this.currentTags.push("<table_marker>");
                        this.currentTag = "<table_marker>";
                    } else if (value2.equals("formula") || value2.equals("equation")) {
                        this.currentTags.push("<equation_marker>");
                        this.currentTag = "<equation_marker>";
                    } else if (value2.equals("section")) {
                        this.currentTags.push("<section_marker>");
                        this.currentTag = "<section_marker>";
                    } else {
                        logger.error("Invalid attribute value for element ref: " + qName2 + "=" + value2);
                    }
                }
            }
            return;
        }
        if (str3.equals("formula")) {
            this.currentTags.push("<equation>");
            this.currentTag = "<equation>";
            return;
        }
        if (str3.equals("label")) {
            this.currentTags.push("<equation_label>");
            this.currentTag = "<equation_label>";
            return;
        }
        if (str3.equals("head")) {
            this.currentTags.push("<section>");
            this.currentTag = "<section>";
            return;
        }
        if (str3.equals("table")) {
            this.currentTags.push("<table>");
            this.currentTag = "<table>";
            return;
        }
        if (str3.equals("item")) {
            this.currentTags.push("<paragraph>");
            this.currentTag = "<paragraph>";
            return;
        }
        if (!str3.equals("figure")) {
            if (str3.equals("other")) {
                this.currentTags.push("<other>");
                this.currentTag = "<other>";
                return;
            }
            if (str3.equals("text")) {
                this.currentTags.push("<other>");
                this.currentTag = "<other>";
                return;
            } else {
                if (str3.equals("tei") || str3.equals("teiHeader") || str3.equals("fileDesc") || str3.equals("list")) {
                    return;
                }
                logger.error("Invalid element name: " + str3 + " - it will be mapped to the label <other>");
                this.currentTags.push("<other>");
                this.currentTag = "<other>";
                return;
            }
        }
        this.figureBlock = true;
        int length3 = attributes.getLength();
        for (int i3 = 0; i3 < length3; i3++) {
            String qName3 = attributes.getQName(i3);
            String value3 = attributes.getValue(i3);
            if (qName3 != null) {
                if (!qName3.equals("type")) {
                    logger.error("Invalid attribute name for element figure: " + qName3);
                } else if (value3.equals("table")) {
                    this.tableBlock = true;
                } else {
                    logger.error("Invalid attribute value for element figure: " + qName3 + "=" + value3);
                }
            }
        }
        if (!this.tableBlock) {
            this.currentTags.push("<figure>");
            this.currentTag = "<figure>";
        } else {
            this.figureBlock = false;
            this.currentTags.push("<table>");
            this.currentTag = "<table>";
        }
    }

    private void writeData(String str, boolean z) {
        if ((str.equals("other") || str.equals("p") || str.equals("ref") || str.equals("head") || str.equals("figure") || str.equals("paragraph") || str.equals("div") || str.equals("table") || str.equals("formula") || str.equals("item") || str.equals("label")) && this.currentTag != null) {
            if (z && !this.currentTags.empty()) {
                this.currentTags.pop();
            }
            if (this.tableBlock) {
                this.currentTag = "<table>";
            } else if (this.figureBlock) {
                this.currentTag = "<figure>";
            }
            StringTokenizer stringTokenizer = new StringTokenizer(getText(), TextUtilities.delimiters, true);
            boolean z2 = true;
            while (stringTokenizer.hasMoreTokens()) {
                String trim = stringTokenizer.nextToken().trim();
                if (trim.length() != 0) {
                    if (trim.length() > 0) {
                        if (z2) {
                            this.labeled.add(trim + " I-" + this.currentTag + "\n");
                        } else {
                            this.labeled.add(trim + " " + this.currentTag + "\n");
                        }
                    }
                    z2 = false;
                }
            }
            this.accumulator.setLength(0);
        }
    }
}
