package com.facebook.browser.liteclient.contentextraction;

import android.text.TextUtils;
import android.util.LruCache;
import com.facebook.browser.lite.util.BrowserURLUtil;
import com.facebook.browser.liteclient.qe.ExperimentsForBrowserLiteQEModule;
import com.facebook.inject.InjectorLike;
import com.facebook.inject.InjectorThreadStack;
import com.facebook.inject.ScopeSet;
import com.facebook.inject.SingletonScope;
import com.facebook.qe.api.QeAccessor;
import com.facebook.qe.module.QeInternalImplMethodAutoProvider;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.inject.Singleton;

@Singleton
/* loaded from: classes2.dex */
public class BrowserLiteHtmlExtractor {
    private static final String[] a = {".", ",", "?", ":", "!"};
    private static final String[] b = {"<small", "<script"};
    private static final String[] c = {"PLEASE CONTACT US", "PLEASE RATE THIS", "WHAT YOU THINK", "YOUR COMMENT", "A COMMENT", "COMMENTER", "READER VIEWS", "READER POST", "READER COMMENT", "THANK YOU FOR", "ALL COMMENTS", "SIGN UP", "SIGNED UP", "YOUR COMMENT", "PRIVACY POLICY", "TERM OF USE", "EMAIL ADDRESS", "E-MAIL ADDRESS", "COPYRIGHT", "TOP STORIES", "ADVERTISEMENT", "OUTDATED BROWSER", "YOUR BROWSER", "UPGRADE YOUR BROWSER", "RELATED:", "WEEKLY UPDATES", "YOU ARE SUBSCRIBED", "SUBSCRIPTION", "SUBSCRIBE TO", "FAQ", "ALL RIGHTS RESERVED", "LIKE US", "ENTERPRISES LLC", "IMPROVE USER EXPERIENCE", "TOPICS:", "UPDATED", "PUBLISHED:", "MODIFIED:", "POSTED", "PHOTO BY", "VIDEO BY", "THIS CONTENT", "THE CONTENT", "THE VIEWS", "FACEBOOK FEED", "SHARE ON FACEBOOK", "JAVASCRIPT", "CSS", "ON THIS PAGE", "PLEASE CLICK", "FIXING THIS ERROR", "A BETTER SITE", "REFRESH", "CREATE AN ACCOUNT", "NEW PASSWORD", "TWEET", "FOLLOW US", "LATEST UPDATES", "TRY AGAIN", "FOR MORE DETAILS", "LOG OUT", "COOKIES", "ALL CONTENT", "YOUR FEEDBACK", "NEWS FEED", "THE SITE", "THIS REPORT", "SIGN IN", "YOUR ACCOUNT", "FIND OUT MORE ABOUT", "INTERNET EXPLORER", "THIS POST", "A POST", "CALL US", "SEND UP TO", "DELIVERS BUSINESS", "MICROSOFT ACCOUNT", "TO SUBSCRIBE", "FACEBOOK MESSAGE", "CLICK ON THE BUTTON", "AD-FREE", "BECOMING A MEMBER", "SINGLE DONATION", "NEXT COMMENT", "FACEBOOK MESSENGER"};
    private static final String[] d = {"COMMENTS", "PLEASE HELP US", "CHAT WITH US"};
    private static volatile BrowserLiteHtmlExtractor t;
    public Pattern e = Pattern.compile("(?i)\\b((?:[a-z][\\w-]+:(?:/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}/)(?:[^\\s()<>]+|\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\))+(?:\\(([^\\s()<>]+|(\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:'\".,<>?]))");
    public Pattern f = Pattern.compile("\\d{2,4}/\\d{1,2}/\\d{2,4}");
    public Pattern g = Pattern.compile("(www.)*(.*?)\\.com");
    public Pattern h = Pattern.compile("(<p>|<p[^>]+>)([\\s\\S]*?)</p>");
    public Pattern i = Pattern.compile("<title[^>]*>([\\s\\S]*?)</title>");
    public Pattern j = Pattern.compile("(data-lang|lang|xml:lang)\\s*(=|:)\\s*\"\\s*(en|en-US|en-CA|en-GB|en-IN)\\s*\"");
    public Pattern k = Pattern.compile("<meta\\s+http-equiv\\s*=\\s*\"\\s*[c|C]ontent-[l|L]anguage\\s*\"\\s+content\\s*=\\s*\"\\s*(en|en-US|en-CA|en-GB|en-IN)\\s*\"\\s*/?>");
    public Pattern l = Pattern.compile("<meta\\s+property\\s*=\\s*\"\\s*og:locale\\s*\"\\s+content\\s*=\\s*\"\\s*(en_US|en_PI|en_GB|en_IN|en_UD)\\s*\"[^>]*?>");
    public Pattern m = Pattern.compile("<meta\\s+property\\s*=\\s*\"\\s*og:site_name\\s*\"\\s+content\\s*=\\s*\"([^>]*?)\"[^>]*?>");
    public Pattern n = Pattern.compile("<link[^>]+?rel\\s*=\\s*\"\\s*(apple-touch-icon|apple-touch-icon-precomposed)\\s*\"[^>]+?href\\s*=\\s*\"([^>]*?)\"[^>]*?>");
    public Pattern o = Pattern.compile("<link[^>]+?(href)\\s*=\\s*\"([^>]*?)\"[^>]+?rel\\s*=\\s*\"\\s*(apple-touch-icon|apple-touch-icon-precomposed)\\s*\"[^>]*?>");
    public Pattern p = Pattern.compile("[^>]*?\\.([^.]*?)\\.(edu|com|net|org|biz|info|tv|cc)");
    public LruCache<String, String> q = new LruCache<>(10);
    public final QeAccessor r;
    public int s;

    /* loaded from: classes6.dex */
    public class ExtractResult {

        @Nullable
        public PageInfo a;

        @Nullable
        public String b;
    }

    /* loaded from: classes6.dex */
    public class PageInfo {

        @Nullable
        public String a;

        @Nullable
        public String b;

        @Nullable
        public String c;

        @Nullable
        public String d;
        public int e;
    }

    @Inject
    public BrowserLiteHtmlExtractor(QeAccessor qeAccessor) {
        this.r = qeAccessor;
    }

    public static BrowserLiteHtmlExtractor a(@Nullable InjectorLike injectorLike) {
        if (t == null) {
            synchronized (BrowserLiteHtmlExtractor.class) {
                if (t == null && injectorLike != null) {
                    ScopeSet a2 = ScopeSet.a();
                    byte b2 = a2.b();
                    try {
                        InjectorThreadStack enterScope = ((SingletonScope) injectorLike.getInstance(SingletonScope.class)).enterScope();
                        try {
                            t = new BrowserLiteHtmlExtractor(QeInternalImplMethodAutoProvider.a(injectorLike.getApplicationInjector()));
                        } finally {
                            SingletonScope.a(enterScope);
                        }
                    } finally {
                        a2.a = b2;
                    }
                }
            }
        }
        return t;
    }

    @Nullable
    public static String a(BrowserLiteHtmlExtractor browserLiteHtmlExtractor, String str, Pattern pattern, int i) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        Matcher matcher = pattern.matcher(str);
        String str2 = null;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        while (matcher.find() && i3 < i) {
            String group = matcher.group(2);
            i2++;
            if (group != null) {
                String trim = group.trim();
                boolean f = f(trim);
                if (trim.contains("<p>")) {
                    trim = trim.substring(trim.lastIndexOf("<p>") + 3);
                }
                int length = trim.length();
                if (trim.length() >= browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.g, 35) && g(trim) && !h(trim)) {
                    String g = g(trim, "&(#\\d+|#[\\w\\d]+|\\w+);");
                    if (!i(g.toUpperCase(Locale.US))) {
                        String f2 = f(f(a(a(a(a(a(a(a(a(g, "<a[^>]*>([\\s\\S]*?)</a>", 1), "<cite[^>]*>([\\s\\S]*?)</cite>", 1), "<em[^>]*>([\\s\\S]*?)</em>", 1), "<span[\\s\\S]+?>([\\s\\S]*?)</span>", 1), "<i[^>]*>([\\s\\S]*?)</i>", 1), "<b[^>]*>([\\s\\S]*?)</b>", 1), "<time[^>]*>([\\s\\S]*?)</time>", 1), "<strong[^>]*>([\\s\\S]*?)</strong>", 1), "<br\\s*/>"), "<([\\s\\S]*?)/?>");
                        int length2 = f2.length();
                        if (!a(f2, browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.d, 6)) && !a(browserLiteHtmlExtractor, length2, length, f) && !j(browserLiteHtmlExtractor, f2) && !k(browserLiteHtmlExtractor, f2)) {
                            String replaceAll = f2.trim().replace('\n', ' ').replace("\\", "").replaceAll("\\s{2,}", " ");
                            if (d(browserLiteHtmlExtractor, str2, replaceAll) && !e(str2, replaceAll)) {
                                sb.append(replaceAll);
                                i3++;
                                if (i3 == i - 1) {
                                    browserLiteHtmlExtractor.s = i2 - 1;
                                }
                                i4 += e(replaceAll);
                                if (i3 < i) {
                                    sb.append("\n\n");
                                }
                            }
                            i3 = i3;
                            i4 = i4;
                            str2 = replaceAll;
                        }
                    }
                }
            }
        }
        if ((i3 > browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.i, 1) || i4 > browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.k, 50)) && i4 > browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.j, 50)) {
            return sb.toString();
        }
        return null;
    }

    @Nullable
    private static String a(String str, String str2, int i) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        Matcher matcher = Pattern.compile(str2).matcher(str);
        int i2 = 0;
        while (matcher.find()) {
            String group = matcher.group(i);
            sb.append(str.substring(i2, matcher.start()));
            sb.append(group);
            i2 = matcher.end();
        }
        sb.append(str.substring(i2));
        return sb.toString();
    }

    private static boolean a(BrowserLiteHtmlExtractor browserLiteHtmlExtractor, int i, int i2, boolean z) {
        return ((float) i) / ((float) i2) < browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.h, 0.4f) && z;
    }

    private static boolean a(String str, int i) {
        int i2 = 0;
        for (int i3 = 0; i3 < str.length(); i3++) {
            if (str.codePointAt(i3) == 10) {
                i2++;
            }
        }
        return i2 > i;
    }

    private static boolean d(BrowserLiteHtmlExtractor browserLiteHtmlExtractor, String str, String str2) {
        return e(str2) > browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.l, 20) || e(str) >= browserLiteHtmlExtractor.r.a(ExperimentsForBrowserLiteQEModule.m, 30);
    }

    private static int e(String str) {
        if (TextUtils.isEmpty(str)) {
            return 0;
        }
        return str.split("\\s+").length;
    }

    private static boolean e(String str, String str2) {
        if (str == null || str2 == null) {
            return false;
        }
        return str2.trim().equals(str.trim());
    }

    @Nullable
    public static String f(String str, String str2) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        Matcher matcher = Pattern.compile(str2).matcher(str);
        int i = 0;
        while (matcher.find()) {
            sb.append(str.substring(i, matcher.start()));
            i = matcher.end();
        }
        sb.append(str.substring(i));
        return sb.toString();
    }

    private static boolean f(String str) {
        return str.contains("img") || str.contains("<a") || str.contains("<time");
    }

    @Nullable
    public static String g(String str, String str2) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        Matcher matcher = Pattern.compile(str2).matcher(str);
        int i = 0;
        while (matcher.find()) {
            String group = matcher.group();
            if (group != null) {
                sb.append(str.substring(i, matcher.start()));
                i = matcher.end();
                sb.append(l(group));
                if (group.equals("&amp;") || group.equals("&#38;")) {
                    if (str.codePointAt(i) == 35) {
                        int indexOf = str.indexOf(59, i);
                        if (indexOf == -1 || i + 5 < indexOf) {
                            sb.append("&");
                        } else {
                            sb.append(l("&" + str.substring(i, indexOf + 1)));
                            i = indexOf + 1;
                        }
                    } else {
                        sb.append("&");
                    }
                }
            }
        }
        sb.append(str.substring(i));
        return sb.toString();
    }

    private static boolean g(String str) {
        for (String str2 : a) {
            if (str.contains(str2)) {
                return true;
            }
        }
        return false;
    }

    private static boolean h(String str) {
        for (String str2 : b) {
            if (str.contains(str2)) {
                return true;
            }
        }
        return false;
    }

    private static boolean i(String str) {
        for (String str2 : c) {
            if (str.contains(str2)) {
                return true;
            }
        }
        for (String str3 : d) {
            if (str.startsWith(str3)) {
                return true;
            }
        }
        return false;
    }

    private static boolean j(BrowserLiteHtmlExtractor browserLiteHtmlExtractor, String str) {
        if (!str.contains("<a")) {
            if (browserLiteHtmlExtractor.e != null && browserLiteHtmlExtractor.e.matcher(str).find()) {
                return true;
            }
            if (browserLiteHtmlExtractor.g != null && browserLiteHtmlExtractor.e.matcher(str).find()) {
                return true;
            }
        }
        return false;
    }

    private static boolean k(BrowserLiteHtmlExtractor browserLiteHtmlExtractor, String str) {
        return browserLiteHtmlExtractor.f != null && browserLiteHtmlExtractor.f.matcher(str).find();
    }

    @Nullable
    private static String l(String str) {
        if (str == null) {
            return null;
        }
        return (str.equals("&rsquo;") || str.equals("&#8217;") || str.equals("&#8216;") || str.equals("&#39;") || str.equals("&#039;") || str.equals("&#x27;") || str.equals("&lsquo;")) ? "'" : (str.equals("&ndash;") || str.equals("&mdash;") || str.equals("&#8212;") || str.equals("&#8211;")) ? "-" : (str.equals("&#8230;") || str.equals("&hellip;")) ? "..." : (str.equals("&ldquo;") || str.equals("&rdquo;") || str.equals("&#8220;") || str.equals("&#8221;") || str.equals("&quot;")) ? "\"" : (str.equals("&nbsp;") || str.equals("&#32;")) ? " " : "";
    }

    @Nullable
    public final ExtractResult a(String str, String str2) {
        String str3;
        String str4;
        ExtractResult extractResult = new ExtractResult();
        if (!this.r.a(ExperimentsForBrowserLiteQEModule.n, false)) {
            boolean z = false;
            if (str2 != null) {
                if (this.j.matcher(str2).find()) {
                    z = true;
                } else if (this.k.matcher(str2).find()) {
                    z = true;
                } else if (this.l.matcher(str2).find()) {
                    z = true;
                }
            }
            if (!z) {
                extractResult.a = null;
                extractResult.b = "non_english";
                return extractResult;
            }
        }
        PageInfo pageInfo = new PageInfo();
        Matcher matcher = this.i.matcher(str2);
        while (true) {
            if (!matcher.find()) {
                str3 = null;
                break;
            }
            String group = matcher.group(1);
            if (!TextUtils.isEmpty(group)) {
                String trim = group.trim();
                if (trim.length() != 0) {
                    str3 = f(g(trim, "&(#\\d+|#[\\w\\d]+|\\w+);"), "<([\\s\\S]*?)/?>").trim().replace('\n', ' ').replace("\\", "").replaceAll("\\s{2,}", " ");
                    String[] split = str3.split("\\|");
                    if (split != null && split.length > 1) {
                        str3 = split[0].trim();
                    }
                }
            }
        }
        String str5 = str3;
        if (TextUtils.isEmpty(str5)) {
            extractResult.a = null;
            extractResult.b = "empty_title";
        } else {
            String a2 = str2 == null ? null : a(this, str2, this.h, this.r.a(ExperimentsForBrowserLiteQEModule.o, 3));
            if (TextUtils.isEmpty(a2)) {
                extractResult.a = null;
                extractResult.b = "content_too_short";
            } else {
                if (this.r.a(ExperimentsForBrowserLiteQEModule.s, false)) {
                    pageInfo.a = "[FB-Only] " + str5;
                } else {
                    pageInfo.a = str5;
                }
                pageInfo.c = a2;
                pageInfo.b = b(str, str2);
                if (str == null) {
                    str4 = null;
                } else {
                    String b2 = BrowserURLUtil.b(str);
                    str4 = this.q.get(b2);
                    if (str4 == null) {
                        Matcher matcher2 = this.n.matcher(str2);
                        boolean z2 = false;
                        str4 = null;
                        while (true) {
                            if (!matcher2.find()) {
                                break;
                            }
                            str4 = matcher2.group(2);
                            if (!TextUtils.isEmpty(str4)) {
                                str4 = str4.trim();
                                if (!TextUtils.isEmpty(str4)) {
                                    z2 = true;
                                    break;
                                }
                            }
                        }
                        if (!z2) {
                            Matcher matcher3 = this.o.matcher(str2);
                            while (matcher3.find()) {
                                str4 = matcher3.group(2);
                                if (!TextUtils.isEmpty(str4)) {
                                    str4 = str4.trim();
                                    if (!TextUtils.isEmpty(str4)) {
                                        break;
                                    }
                                }
                            }
                        }
                        if (TextUtils.isEmpty(str4)) {
                            str4 = null;
                        } else if (BrowserURLUtil.b(str4) == null) {
                            if (str4.startsWith("//")) {
                                str4 = str4.substring(1);
                            }
                            str4 = str4.startsWith("http:") ? "http://" + b2 + str4 : "https://" + b2 + str4;
                            this.q.put(b2, str4);
                        }
                    }
                }
                pageInfo.d = str4;
                pageInfo.e = this.s;
                extractResult.a = pageInfo;
                extractResult.b = "Extracted";
            }
        }
        return extractResult;
    }

    @Nullable
    public final String b(String str, String str2) {
        String substring;
        String f;
        if (str == null) {
            return null;
        }
        String str3 = null;
        Matcher matcher = this.m.matcher(str2);
        if (matcher.find()) {
            String group = matcher.group(1);
            if (!TextUtils.isEmpty(group) && (f = f(g(group.trim(), "&(#\\d+|#[\\w\\d]+|\\w+);"), "<([\\s\\S]*?)/?>")) != null) {
                str3 = f.toUpperCase(Locale.US);
            }
        }
        String str4 = str3;
        if (str4 != null) {
            return str4;
        }
        String b2 = BrowserURLUtil.b(str);
        Matcher matcher2 = this.p.matcher(b2);
        if (matcher2.find()) {
            String group2 = matcher2.group(1);
            if (group2 != null) {
                substring = group2.trim();
            }
            substring = null;
        } else if (b2.startsWith("www.") || b2.startsWith("m.") || b2.startsWith("3g.")) {
            int indexOf = b2.indexOf(46);
            int indexOf2 = b2.indexOf(46, indexOf + 1);
            if (indexOf2 != -1) {
                substring = b2.substring(indexOf + 1, indexOf2);
            }
            substring = null;
        } else {
            int indexOf3 = b2.indexOf(46);
            if (indexOf3 != -1) {
                substring = b2.substring(0, indexOf3);
            }
            substring = null;
        }
        if (substring != null) {
            return substring.toUpperCase(Locale.US);
        }
        return null;
    }
}
