package org.grobid.trainer.sax;

import java.io.IOException;
import java.io.Writer;
import java.util.StringTokenizer;
import org.grobid.core.exceptions.GrobidException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/grobid/trainer/sax/TEIHeaderSaxParserOCRTraining.class */
public class TEIHeaderSaxParserOCRTraining extends DefaultHandler {
    private StringBuffer accumulator;
    private String output;
    private String currentTag;
    private Writer writer;
    private Writer writer_affiliations;
    private Writer writer_addresses;
    private Writer writer_keywords;
    private Writer writer_authors;
    private Writer writer_notes;
    public static String punctuations = ",:;?.!)-\"']";

    public TEIHeaderSaxParserOCRTraining() {
        this.accumulator = new StringBuffer();
        this.output = null;
        this.currentTag = null;
        this.writer = null;
        this.writer_affiliations = null;
        this.writer_addresses = null;
        this.writer_keywords = null;
        this.writer_authors = null;
        this.writer_notes = null;
    }

    public TEIHeaderSaxParserOCRTraining(Writer writer, Writer writer2, Writer writer3, Writer writer4, Writer writer5) {
        this.accumulator = new StringBuffer();
        this.output = null;
        this.currentTag = null;
        this.writer = null;
        this.writer_affiliations = null;
        this.writer_addresses = null;
        this.writer_keywords = null;
        this.writer_authors = null;
        this.writer_notes = null;
        this.writer_affiliations = writer;
        this.writer_addresses = writer2;
        this.writer_keywords = writer3;
        this.writer_authors = writer4;
        this.writer_notes = writer5;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        this.accumulator.append(cArr, i, i2);
    }

    public String getText() {
        return this.accumulator.toString().trim();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        if (!(str3.equals("titlePart") | str3.equals("note") | str3.equals("byline") | str3.equals("affiliation") | str3.equals("address") | str3.equals("email") | str3.equals("idno") | str3.equals("date") | str3.equals("biblScope") | str3.equals("keywords") | str3.equals("ptr") | str3.equals("div")) && !str3.equals("title")) {
            if (str3.equals("lb")) {
                this.accumulator.append(" +L+ ");
                return;
            } else {
                if (str3.equals("pb")) {
                    this.accumulator.append(" +PAGE+ ");
                    return;
                }
                return;
            }
        }
        String text = getText();
        Writer writer = null;
        if (str3.equals("affiliation")) {
            writer = this.writer_affiliations;
        } else if (str3.equals("address")) {
            writer = this.writer_addresses;
        } else if (str3.equals("keywords")) {
            writer = this.writer_keywords;
        } else if (this.currentTag.equals("<author>")) {
            writer = this.writer_authors;
        } else if (str3.equals("note")) {
            writer = this.writer_notes;
        }
        if (writer != null) {
            try {
                StringTokenizer stringTokenizer = new StringTokenizer(text, " \n\t");
                while (stringTokenizer.hasMoreTokens()) {
                    String trim = stringTokenizer.nextToken().trim();
                    if (trim.length() != 0) {
                        boolean z = false;
                        if (trim.equals("+L+")) {
                            writer.write("\n");
                        } else if (trim.equals("+PAGE+")) {
                            writer.write("\n");
                        } else {
                            String str4 = trim;
                            int i = 0;
                            while (true) {
                                if (i < punctuations.length()) {
                                    if (trim.length() > 0 && trim.charAt(trim.length() - 1) == punctuations.charAt(i)) {
                                        z = true;
                                        str4 = trim.substring(0, trim.length() - 1);
                                        break;
                                    }
                                    i++;
                                } else {
                                    break;
                                }
                            }
                            if (trim.length() > 0) {
                                if (trim.startsWith("(") && (trim.length() > 1)) {
                                    str4 = z ? trim.substring(1, trim.length() - 1) : trim.substring(1, trim.length());
                                    writer.write("( ");
                                } else {
                                    if (trim.startsWith("[") && (trim.length() > 1)) {
                                        str4 = z ? trim.substring(1, trim.length() - 1) : trim.substring(1, trim.length());
                                        writer.write("[ ");
                                    } else {
                                        if (trim.startsWith("\"") & (trim.length() > 1)) {
                                            str4 = z ? trim.substring(1, trim.length() - 1) : trim.substring(1, trim.length());
                                            writer.write("\" ");
                                        }
                                    }
                                }
                            }
                            if (str4.length() > 0) {
                                writer.write(str4 + " ");
                            }
                            if (z) {
                                writer.write(trim.charAt(trim.length() - 1) + " ");
                            }
                        }
                    }
                }
                writer.write("\n");
            } catch (IOException e) {
                throw new GrobidException("An exception occured while running Grobid.", e);
            }
        }
        this.accumulator.setLength(0);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (str3.equals("div")) {
            int length = attributes.getLength();
            for (int i = 0; i < length; i++) {
                String qName = attributes.getQName(i);
                String value = attributes.getValue(i);
                if (qName != null && qName.equals("type")) {
                    if (value.equals("abstract")) {
                        this.currentTag = "<abstract>";
                    } else if (value.equals("intro")) {
                        this.currentTag = "<intro>";
                    } else if (value.equals("paragraph")) {
                        this.currentTag = "<other>";
                    }
                }
            }
            return;
        }
        if (str3.equals("note")) {
            attributes.getLength();
            this.currentTag = "<note>";
            return;
        }
        if (str3.equals("ptr")) {
            int length2 = attributes.getLength();
            for (int i2 = 0; i2 < length2; i2++) {
                String qName2 = attributes.getQName(i2);
                String value2 = attributes.getValue(i2);
                if (qName2 != null && qName2.equals("type") && value2.equals("web")) {
                    this.currentTag = "<web>";
                }
            }
            return;
        }
        if (str3.equals("biblScope")) {
            int length3 = attributes.getLength();
            for (int i3 = 0; i3 < length3; i3++) {
                String qName3 = attributes.getQName(i3);
                String value3 = attributes.getValue(i3);
                if (qName3 != null && qName3.equals("type")) {
                    if (value3.equals("vol")) {
                        this.currentTag = "<volume>";
                    } else if (value3.equals("pp")) {
                        this.currentTag = "<pages>";
                    }
                }
            }
            return;
        }
        if (str3.equals("titlePart")) {
            this.currentTag = "<title>";
            this.accumulator.setLength(0);
            return;
        }
        if (str3.equals("idno")) {
            this.currentTag = "<pubnum>";
            return;
        }
        if (str3.equals("docAuthor")) {
            this.currentTag = "<author>";
            return;
        }
        if (str3.equals("affiliation")) {
            this.currentTag = "<affiliation>";
            return;
        }
        if (str3.equals("address")) {
            this.currentTag = "<address>";
            return;
        }
        if (str3.equals("email")) {
            this.currentTag = "<email>";
            return;
        }
        if (str3.equals("date")) {
            this.currentTag = "<date>";
        } else if (str3.equals("keywords")) {
            this.currentTag = "<keyword>";
        } else if (str3.equals("title")) {
            this.currentTag = "<journal>";
        }
    }
}
