package org.grobid.trainer.sax;

import java.util.ArrayList;
import java.util.List;
import org.apache.commons.collections4.CollectionUtils;
import org.grobid.core.analyzers.GrobidAnalyzer;
import org.grobid.core.lang.Language;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.UnicodeUtil;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/grobid/trainer/sax/TEICitationSaxParser.class */
public class TEICitationSaxParser extends DefaultHandler {
    private List<List<String>> allLabeled;
    private List<List<LayoutToken>> allTokens;
    private StringBuffer accumulator = new StringBuffer();
    private StringBuffer allContent = new StringBuffer();
    private String output = null;
    private String currentTag = null;
    private List<String> labeled = null;
    private List<LayoutToken> tokens = null;
    public int nbCitations = 0;

    public TEICitationSaxParser() {
        this.allLabeled = null;
        this.allTokens = null;
        this.allTokens = new ArrayList();
        this.allLabeled = new ArrayList();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        this.accumulator.append(cArr, i, i2);
        if (this.allContent != null) {
            this.allContent.append(cArr, i, i2);
        }
    }

    public String getText() {
        return this.accumulator.toString().trim();
    }

    public List<List<String>> getLabeledResult() {
        return this.allLabeled;
    }

    public List<List<LayoutToken>> getTokensResult() {
        return this.allTokens;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) throws SAXException {
        String lowerCase = str3.toLowerCase();
        if (lowerCase.equals("author") || lowerCase.equals("authors") || lowerCase.equals("orgname") || lowerCase.equals("title") || lowerCase.equals("editor") || lowerCase.equals("editors") || lowerCase.equals("booktitle") || lowerCase.equals("date") || lowerCase.equals("journal") || lowerCase.equals("institution") || lowerCase.equals("tech") || lowerCase.equals("volume") || lowerCase.equals("pages") || lowerCase.equals("page") || lowerCase.equals("pubplace") || lowerCase.equals("note") || lowerCase.equals("web") || lowerCase.equals("pages") || lowerCase.equals("publisher") || lowerCase.equals("idno") || lowerCase.equals("issue") || lowerCase.equals("pubnum") || lowerCase.equals("biblscope") || lowerCase.equals("ptr") || lowerCase.equals("keyword") || lowerCase.equals("keywords")) {
            writeField(getText());
        } else if (lowerCase.equals("lb")) {
            this.accumulator.append(" +L+ ");
        } else if (lowerCase.equals("pb")) {
            this.accumulator.append(" +PAGE+ ");
        } else if (lowerCase.equals("bibl")) {
            String text = getText();
            if (text.length() > 0) {
                this.currentTag = "<other>";
                writeField(text);
            }
            this.nbCitations++;
            this.allLabeled.add(this.labeled);
            this.allTokens.add(this.tokens);
            this.allContent = null;
        }
        this.accumulator.setLength(0);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        String text = getText();
        if (text.length() > 0) {
            this.currentTag = "<other>";
            writeField(text);
        }
        this.accumulator.setLength(0);
        String lowerCase = str3.toLowerCase();
        if (lowerCase.equals("title")) {
            int length = attributes.getLength();
            for (int i = 0; i < length; i++) {
                String qName = attributes.getQName(i);
                String value = attributes.getValue(i);
                if (qName != null && value != null && qName.equals("level")) {
                    if (value.equals("a")) {
                        this.currentTag = "<title>";
                    } else if (value.equals("j") || value.equals("s")) {
                        this.currentTag = "<journal>";
                    } else if (value.equals("m")) {
                        this.currentTag = "<booktitle>";
                    }
                }
            }
        } else if (lowerCase.equals("author") || lowerCase.equals("authors")) {
            this.currentTag = "<author>";
        } else if (lowerCase.equals("editor")) {
            this.currentTag = "<editor>";
        } else if (lowerCase.equals("date")) {
            this.currentTag = "<date>";
        } else if (lowerCase.equals("keywords") || lowerCase.equals("keyword")) {
            this.currentTag = "<keyword>";
        } else if (lowerCase.equals("orgname")) {
            boolean z = false;
            int length2 = attributes.getLength();
            for (int i2 = 0; i2 < length2; i2++) {
                String qName2 = attributes.getQName(i2);
                String value2 = attributes.getValue(i2);
                if (qName2 != null && value2 != null && qName2.equals("type") && value2.equals("collaboration")) {
                    this.currentTag = "<collaboration>";
                    z = true;
                }
            }
            if (!z) {
                this.currentTag = "<institution>";
            }
        } else if (lowerCase.equals("note")) {
            int length3 = attributes.getLength();
            if (length3 == 0) {
                this.currentTag = "<note>";
            } else {
                for (int i3 = 0; i3 < length3; i3++) {
                    String qName3 = attributes.getQName(i3);
                    String value3 = attributes.getValue(i3);
                    if (qName3 != null && value3 != null && qName3.equals("type") && value3.equals("report")) {
                        this.currentTag = "<tech>";
                    }
                }
            }
        } else if (lowerCase.equals("biblscope")) {
            int length4 = attributes.getLength();
            for (int i4 = 0; i4 < length4; i4++) {
                String qName4 = attributes.getQName(i4);
                String value4 = attributes.getValue(i4);
                if (qName4 != null && value4 != null && (qName4.equals("type") || qName4.equals("unit"))) {
                    if (value4.equals("vol") || value4.equals("volume")) {
                        this.currentTag = "<volume>";
                    } else if (value4.equals("issue") || value4.equals("number")) {
                        this.currentTag = "<issue>";
                    }
                    if (value4.equals("pp") || value4.equals("page")) {
                        this.currentTag = "<pages>";
                    }
                }
            }
        } else if (lowerCase.equals("pubplace")) {
            this.currentTag = "<location>";
        } else if (lowerCase.equals("publisher")) {
            this.currentTag = "<publisher>";
        } else if (lowerCase.equals("ptr")) {
            int length5 = attributes.getLength();
            for (int i5 = 0; i5 < length5; i5++) {
                String qName5 = attributes.getQName(i5);
                String value5 = attributes.getValue(i5);
                if (qName5 != null && value5 != null && qName5.equals("type") && value5.equals("web")) {
                    this.currentTag = "<web>";
                }
            }
        } else if (lowerCase.equals("idno") || lowerCase.equals("pubnum")) {
            this.currentTag = "<pubnum>";
            int length6 = attributes.getLength();
            for (int i6 = 0; i6 < length6; i6++) {
                String qName6 = attributes.getQName(i6);
                String value6 = attributes.getValue(i6);
                if (qName6 != null && value6 != null && qName6.equals("type")) {
                    value6.toLowerCase();
                }
            }
        } else if (lowerCase.equals("bibl")) {
            this.accumulator = new StringBuffer();
            this.allContent = new StringBuffer();
            this.labeled = new ArrayList();
            this.tokens = new ArrayList();
        }
        this.accumulator.setLength(0);
    }

    private void writeField(String str) {
        if (this.tokens == null) {
            return;
        }
        List<LayoutToken> list = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(str);
        if (CollectionUtils.isEmpty(list)) {
            list = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(str, new Language("en", 1.0d));
        }
        if (CollectionUtils.isEmpty(list)) {
            return;
        }
        boolean z = true;
        for (LayoutToken layoutToken : list) {
            this.tokens.add(layoutToken);
            String text = layoutToken.getText();
            if (text.equals(" ") || text.equals("\n")) {
                this.labeled.add(null);
            } else {
                String normaliseTextAndRemoveSpaces = UnicodeUtil.normaliseTextAndRemoveSpaces(text);
                if (normaliseTextAndRemoveSpaces.trim().length() == 0) {
                    this.labeled.add(null);
                } else if (normaliseTextAndRemoveSpaces.length() > 0) {
                    if (z) {
                        this.labeled.add("I-" + this.currentTag);
                        z = false;
                    } else {
                        this.labeled.add(this.currentTag);
                    }
                }
            }
        }
    }
}
