mirror of
https://github.com/woodchen-ink/Q58Bot.git
synced 2025-07-18 13:52:07 +08:00
修复(link_filter.go): 优化链接过滤器,增强链接匹配准确性。
This commit is contained in:
parent
eb111a34da
commit
5744e7e88f
@ -13,7 +13,7 @@
|
||||
|
||||
1. TeleGuard:一个 Telegram 机器人,用于管理群组中的关键词并自动删除包含这些关键词的消息。
|
||||
2. 币安价格更新器:定期获取并发送指定加密货币的价格信息。
|
||||
3. 链接拦截:拦截并撤回非白名单域名链接的第二次发送。
|
||||
3. 链接拦截:拦截并撤回非白名单域名链接的第二次发送, 这里不去掉查询参数, 但是去掉头部的http协议。
|
||||
|
||||
|
||||
这些功能被整合到一个 Docker 容器中,可以同时运行。
|
||||
@ -33,6 +33,11 @@
|
||||
### 链接拦截
|
||||
- 非白名单域名链接, 在发送第二次会被拦截撤回
|
||||
|
||||
### 白名单域名
|
||||
- 会匹配链接中的域名, 包括二级域名和三级域名
|
||||
- 例如,如果白名单中有 "example.com",它将匹配 "example.com"、"sub.example.com" 和 "sub.sub.example.com"。
|
||||
- 同时,如果白名单中有 "sub.example.com",它将匹配 "sub.example.com" 和 "subsub.sub.example.com",但不会匹配 "example.com" 或 "othersub.example.com"。
|
||||
|
||||
|
||||
## 安装与配置
|
||||
|
||||
|
@ -58,6 +58,9 @@ func (lf *LinkFilter) NormalizeLink(link string) string {
|
||||
return link
|
||||
}
|
||||
normalized := fmt.Sprintf("%s%s", parsedURL.Hostname(), parsedURL.EscapedPath())
|
||||
if parsedURL.RawQuery != "" {
|
||||
normalized += "?" + parsedURL.RawQuery
|
||||
}
|
||||
result := strings.TrimSuffix(normalized, "/")
|
||||
logger.Printf("Normalized link: %s -> %s", link, result)
|
||||
return result
|
||||
@ -69,19 +72,30 @@ func (lf *LinkFilter) ExtractDomain(urlStr string) string {
|
||||
logger.Printf("Error parsing URL: %v", err)
|
||||
return urlStr
|
||||
}
|
||||
domain := parsedURL.Hostname()
|
||||
parts := strings.Split(domain, ".")
|
||||
if len(parts) > 2 {
|
||||
domain = strings.Join(parts[len(parts)-2:], ".")
|
||||
}
|
||||
return strings.ToLower(domain)
|
||||
return strings.ToLower(parsedURL.Hostname())
|
||||
}
|
||||
|
||||
func (lf *LinkFilter) domainMatch(domain, whiteDomain string) bool {
|
||||
domainParts := strings.Split(domain, ".")
|
||||
whiteDomainParts := strings.Split(whiteDomain, ".")
|
||||
|
||||
if len(domainParts) < len(whiteDomainParts) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i := 1; i <= len(whiteDomainParts); i++ {
|
||||
if domainParts[len(domainParts)-i] != whiteDomainParts[len(whiteDomainParts)-i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
func (lf *LinkFilter) IsWhitelisted(link string) bool {
|
||||
domain := lf.ExtractDomain(link)
|
||||
for _, whiteDomain := range lf.whitelist {
|
||||
if domain == whiteDomain {
|
||||
logger.Printf("Whitelist check for %s: Passed", link)
|
||||
if lf.domainMatch(domain, whiteDomain) {
|
||||
logger.Printf("Whitelist check for %s: Passed (matched %s)", link, whiteDomain)
|
||||
return true
|
||||
}
|
||||
}
|
||||
@ -142,10 +156,10 @@ func (lf *LinkFilter) ShouldFilter(text string) (bool, []string) {
|
||||
|
||||
links := lf.linkPattern.FindAllString(text, -1)
|
||||
logger.Printf("Found links: %v", links)
|
||||
|
||||
var newNonWhitelistedLinks []string
|
||||
for _, link := range links {
|
||||
normalizedLink := lf.NormalizeLink(link)
|
||||
normalizedLink = strings.TrimPrefix(normalizedLink, "/")
|
||||
if !lf.IsWhitelisted(normalizedLink) {
|
||||
logger.Printf("Link not whitelisted: %s", normalizedLink)
|
||||
found := false
|
||||
|
Loading…
x
Reference in New Issue
Block a user