package org.grobid.core.utilities;

import com.google.common.collect.ListMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.MultimapBuilder;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.saxon.om.Item;
import net.sf.saxon.om.SequenceIterator;
import net.sf.saxon.trans.XPathException;
import org.apache.commons.io.IOUtils;
import org.grobid.core.data.BibDataSetContext;

/* loaded from: input_file:org/grobid/core/utilities/BibDataSetContextExtractor.class */
public class BibDataSetContextExtractor {
    public static final Pattern REF_PATTERN = Pattern.compile("<ref>(.*)</ref>", 32);
    public static final int CUT_DEFAULT_LENGTH = 50;
    private static final String CONTEXT_EXTRACTION_XQ;

    static <K extends Comparable<? super K>, V> ListMultimap<K, V> multimap() {
        return MultimapBuilder.treeKeys().linkedListValues().build();
    }

    protected static String cutContextSimple(String str) {
        Matcher matcher = REF_PATTERN.matcher(str);
        if (!matcher.find()) {
            throw new IllegalStateException("Implementation error: no <ref> found in" + str);
        }
        String group = matcher.group(1);
        int start = matcher.start();
        return str.substring(Math.max(0, start - 50), Math.min(str.length(), start + group.length() + 50));
    }

    public static Multimap<String, BibDataSetContext> getCitationReferences(String str) throws XPathException, IOException {
        SequenceIterator sequenceIterator = new XQueryProcessor(str).getSequenceIterator(CONTEXT_EXTRACTION_XQ);
        ListMultimap multimap = multimap();
        while (true) {
            Item next = sequenceIterator.next();
            if (next == null) {
                return multimap;
            }
            String stringValue = next.getStringValue();
            String stringValue2 = sequenceIterator.next().getStringValue();
            sequenceIterator.next().getStringValue();
            Double.parseDouble(sequenceIterator.next().getStringValue());
            String stringValue3 = sequenceIterator.next().getStringValue();
            BibDataSetContext bibDataSetContext = new BibDataSetContext();
            bibDataSetContext.setContext(extractContextSentence(cutContextSimple(stringValue)));
            bibDataSetContext.setDocumentCoords(stringValue3);
            bibDataSetContext.setTeiId(stringValue2);
            multimap.put(stringValue2, bibDataSetContext);
        }
    }

    private static String extractContextSentence(String str) {
        Matcher matcher = REF_PATTERN.matcher(str);
        if (matcher.find()) {
            return matcher.replaceAll(Matcher.quoteReplacement(matcher.group(1)));
        }
        throw new IllegalStateException("Implementation error: no <ref> found in" + str);
    }

    static {
        InputStream resourceAsStream = BibDataSetContextExtractor.class.getResourceAsStream("/xq/get-citation-context-from-tei.xq");
        try {
            CONTEXT_EXTRACTION_XQ = IOUtils.toString(resourceAsStream, StandardCharsets.UTF_8);
            IOUtils.closeQuietly(resourceAsStream);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
