package org.grobid.core.data.util;

import com.google.common.base.Splitter;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.grobid.core.utilities.TextUtilities;

/* loaded from: input_file:org/grobid/core/data/util/EmailSanitizer.class */
public class EmailSanitizer {
    private static final Pattern DASHES_PATTERN = Pattern.compile("(%E2%80%90|%e2%80%90)");
    private static final Set<String> BLACKLISTED_EMAIL_WORDS = Sets.newHashSet(new String[]{"firstname", "lastname", "publication", "theses", "thesis", "editor", "press", "contact", "info", "feedback", "journal", "please", "pubs", "iza@iza", "admin", "help", "subs", "news", "archives", "order", "postmaster@", "informa", "reprint", "comunicacion@", "revista", "digitalcommons", "group@", "root@", "deposit@", "studies", "permiss", "print", "paper", "report", "support", "pedocs", "investigaciones@", "medicin", "copyright", "rights", "sales@", "pacific@", "redaktion", "publicidad", "surface@", "comstat@", "service@", "omnia@", "letter", "scholar", "staff", "delivery", "epubs", "office", "technolog", "compute", "elsevier"});
    private static final Pattern[] EMAIL_STRIP_PATTERNS = {Pattern.compile("^(e\\-mail|email|e\\smail|mail):"), Pattern.compile("[\\r\\n\\t ]"), Pattern.compile("\\(.*\\)$")};
    private static final Pattern[] AT_SYMBOL_REPLACEMENTS = {Pattern.compile("&#64;"), Pattern.compile("@\\."), Pattern.compile("\\.@")};
    private static final Pattern EMAIL_SPLITTER_PATTERN = Pattern.compile("(\\sor\\s|,|;|/)");
    private static final Pattern AT_SPLITTER = Pattern.compile("@");

    public List<String> splitAndClean(List<String> list) {
        String postValidateAddress;
        if (list == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String initialReplace = initialReplace(it.next());
            ArrayList newArrayList = Lists.newArrayList(Splitter.on(EMAIL_SPLITTER_PATTERN).omitEmptyStrings().split(initialReplace.toLowerCase()).iterator());
            if (newArrayList.size() > 1 && Lists.newArrayList(Splitter.on(AT_SPLITTER).omitEmptyStrings().split(initialReplace.toLowerCase()).iterator()).size() == 2) {
                String substring = ((String) newArrayList.get(newArrayList.size() - 1)).substring(((String) newArrayList.get(newArrayList.size() - 1)).indexOf(64) + 1);
                for (int i = 0; i < newArrayList.size() - 1; i++) {
                    newArrayList.set(i, ((String) newArrayList.get(i)) + "@" + substring);
                }
            }
            Iterator it2 = newArrayList.iterator();
            while (it2.hasNext()) {
                try {
                    String cleanEmail = cleanEmail((String) it2.next());
                    if (cleanEmail != null && !cleanEmail.isEmpty() && !hashSet.contains(cleanEmail) && (postValidateAddress = postValidateAddress(cleanEmail)) != null) {
                        hashSet.add(postValidateAddress);
                        arrayList.add(postValidateAddress);
                    }
                } catch (Exception e) {
                }
            }
        }
        if (arrayList.isEmpty()) {
            return null;
        }
        return arrayList;
    }

    private String initialReplace(String str) {
        return str.replace("{", "").replace("}", "").replace(TextUtilities.START_BRACKET, "").replace(TextUtilities.END_BRACKET, "").trim().replaceAll("(E|e)lectronic(\\s)(A|a)ddress(\\:)?", "").replaceAll("^(e|E)?(\\-)?mail(\\:)?(\\s)(A|a)ddress(\\:)?", "").replaceAll("^(e|E)?(\\-)?mail(\\:)?(\\s)?", "").replaceAll("^[A-Z][a-z]+\\s+[A-Z][a-z]+(\\*)?(\\s)*-(\\s)*", "");
    }

    private static String postValidateAddress(String str) {
        String str2 = str;
        Iterator<String> it = BLACKLISTED_EMAIL_WORDS.iterator();
        while (it.hasNext()) {
            if (str2.contains(it.next())) {
                return null;
            }
        }
        for (Pattern pattern : EMAIL_STRIP_PATTERNS) {
            str2 = pattern.matcher(str2).replaceAll("");
        }
        if (str2.contains("@")) {
            return str2;
        }
        return null;
    }

    private static String cleanEmail(String str) throws UnsupportedEncodingException {
        if (str == null) {
            return null;
        }
        String trim = URLDecoder.decode(DASHES_PATTERN.matcher(str).replaceAll("-"), "UTF-8").toLowerCase().trim();
        for (Pattern pattern : EMAIL_STRIP_PATTERNS) {
            trim = pattern.matcher(trim).replaceAll("");
        }
        for (Pattern pattern2 : AT_SYMBOL_REPLACEMENTS) {
            trim = pattern2.matcher(trim).replaceAll("@");
        }
        return trim;
    }
}
