mirror of
https://github.com/woodchen-ink/Q58Bot.git
synced 2025-07-18 05:42:06 +08:00
优化链接过滤器中的链接标准化函数
链接过滤器功能已增强,可消除链接中不必要的协议部分和初始斜线。该更新有助于更清晰地处理链接,同时保持其基本标识不变。
This commit is contained in:
parent
a1b9371afb
commit
39fe41eb83
@ -54,13 +54,20 @@ class LinkFilter:
|
||||
logger.info(f"Reloaded {len(self.keywords)} keywords and {len(self.whitelist)} whitelist entries")
|
||||
|
||||
def normalize_link(self, link):
|
||||
# 移除协议部分(如 http:// 或 https://)
|
||||
link = re.sub(r'^https?://', '', link)
|
||||
|
||||
# 移除开头的双斜杠
|
||||
link = link.lstrip('/')
|
||||
|
||||
parsed = urllib.parse.urlparse(f"http://{link}")
|
||||
normalized = urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, ''))
|
||||
result = normalized.rstrip('/')
|
||||
|
||||
logger.debug(f"Normalized link: {link} -> {result}")
|
||||
return result
|
||||
|
||||
|
||||
def is_whitelisted(self, link):
|
||||
extracted = tldextract.extract(link)
|
||||
domain = f"{extracted.domain}.{extracted.suffix}"
|
||||
|
Loading…
x
Reference in New Issue
Block a user