From 5744e7e88fdbf5e0f77db75cce78f0bf10c6c874 Mon Sep 17 00:00:00 2001 From: wood chen Date: Wed, 18 Sep 2024 03:04:30 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D(link=5Ffilter.go):=20?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=93=BE=E6=8E=A5=E8=BF=87=E6=BB=A4=E5=99=A8?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=BC=BA=E9=93=BE=E6=8E=A5=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E5=87=86=E7=A1=AE=E6=80=A7=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 7 ++++++- core/link_filter.go | 32 +++++++++++++++++++++++--------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 9a490f0..2b2fc20 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ 1. TeleGuard:一个 Telegram 机器人,用于管理群组中的关键词并自动删除包含这些关键词的消息。 2. 币安价格更新器:定期获取并发送指定加密货币的价格信息。 -3. 链接拦截:拦截并撤回非白名单域名链接的第二次发送。 +3. 链接拦截:拦截并撤回非白名单域名链接的第二次发送, 这里不去掉查询参数, 但是去掉头部的http协议。 这些功能被整合到一个 Docker 容器中,可以同时运行。 @@ -33,6 +33,11 @@ ### 链接拦截 - 非白名单域名链接, 在发送第二次会被拦截撤回 +### 白名单域名 +- 会匹配链接中的域名, 包括二级域名和三级域名 +- 例如,如果白名单中有 "example.com",它将匹配 "example.com"、"sub.example.com" 和 "sub.sub.example.com"。 +- 同时,如果白名单中有 "sub.example.com",它将匹配 "sub.example.com" 和 "subsub.sub.example.com",但不会匹配 "example.com" 或 "othersub.example.com"。 + ## 安装与配置 diff --git a/core/link_filter.go b/core/link_filter.go index c23f99c..8d80ef1 100644 --- a/core/link_filter.go +++ b/core/link_filter.go @@ -58,6 +58,9 @@ func (lf *LinkFilter) NormalizeLink(link string) string { return link } normalized := fmt.Sprintf("%s%s", parsedURL.Hostname(), parsedURL.EscapedPath()) + if parsedURL.RawQuery != "" { + normalized += "?" + parsedURL.RawQuery + } result := strings.TrimSuffix(normalized, "/") logger.Printf("Normalized link: %s -> %s", link, result) return result @@ -69,19 +72,30 @@ func (lf *LinkFilter) ExtractDomain(urlStr string) string { logger.Printf("Error parsing URL: %v", err) return urlStr } - domain := parsedURL.Hostname() - parts := strings.Split(domain, ".") - if len(parts) > 2 { - domain = strings.Join(parts[len(parts)-2:], ".") - } - return strings.ToLower(domain) + return strings.ToLower(parsedURL.Hostname()) } +func (lf *LinkFilter) domainMatch(domain, whiteDomain string) bool { + domainParts := strings.Split(domain, ".") + whiteDomainParts := strings.Split(whiteDomain, ".") + + if len(domainParts) < len(whiteDomainParts) { + return false + } + + for i := 1; i <= len(whiteDomainParts); i++ { + if domainParts[len(domainParts)-i] != whiteDomainParts[len(whiteDomainParts)-i] { + return false + } + } + + return true +} func (lf *LinkFilter) IsWhitelisted(link string) bool { domain := lf.ExtractDomain(link) for _, whiteDomain := range lf.whitelist { - if domain == whiteDomain { - logger.Printf("Whitelist check for %s: Passed", link) + if lf.domainMatch(domain, whiteDomain) { + logger.Printf("Whitelist check for %s: Passed (matched %s)", link, whiteDomain) return true } } @@ -142,10 +156,10 @@ func (lf *LinkFilter) ShouldFilter(text string) (bool, []string) { links := lf.linkPattern.FindAllString(text, -1) logger.Printf("Found links: %v", links) + var newNonWhitelistedLinks []string for _, link := range links { normalizedLink := lf.NormalizeLink(link) - normalizedLink = strings.TrimPrefix(normalizedLink, "/") if !lf.IsWhitelisted(normalizedLink) { logger.Printf("Link not whitelisted: %s", normalizedLink) found := false