refactor(bot_commands, guard, link_filter): 重构链接过滤和消息处理逻辑

重构链接过滤逻辑以规范化链接并改进关键词管理。删除未使用的JSON加载/保存函数以减少冗余。更新消息处理逻辑以使用新的过滤器类方法。在link_filter.py中实现新的链接标准化方法。
This commit is contained in:
wood 2024-09-04 17:33:16 +08:00
parent f6fc01c26a
commit 7aac6c3a23
3 changed files with 82 additions and 65 deletions

View File

@ -1,13 +1,16 @@
import os import os
import json
from telethon.tl.types import InputPeerUser from telethon.tl.types import InputPeerUser
from telethon.tl.functions.bots import SetBotCommandsRequest from telethon.tl.functions.bots import SetBotCommandsRequest
from telethon.tl.types import BotCommand from telethon.tl.types import BotCommand
from link_filter import LinkFilter
KEYWORDS_FILE = '/app/data/keywords.json' KEYWORDS_FILE = '/app/data/keywords.json'
WHITELIST_FILE = '/app/data/whitelist.json' WHITELIST_FILE = '/app/data/whitelist.json'
ADMIN_ID = int(os.environ.get('ADMIN_ID')) ADMIN_ID = int(os.environ.get('ADMIN_ID'))
# 创建 LinkFilter 实例
link_filter = LinkFilter(KEYWORDS_FILE, WHITELIST_FILE)
async def register_commands(client, admin_id): async def register_commands(client, admin_id):
commands = [ commands = [
BotCommand('add', '添加新的关键词'), BotCommand('add', '添加新的关键词'),
@ -28,17 +31,6 @@ async def register_commands(client, admin_id):
except Exception as e: except Exception as e:
print(f"Failed to register bot commands: {str(e)}") print(f"Failed to register bot commands: {str(e)}")
def load_json(file_path):
try:
with open(file_path, 'r') as f:
return json.load(f)
except FileNotFoundError:
return []
def save_json(file_path, data):
with open(file_path, 'w') as f:
json.dump(data, f)
async def handle_command(event, client): async def handle_command(event, client):
sender = await event.get_sender() sender = await event.get_sender()
if sender.id != ADMIN_ID: if sender.id != ADMIN_ID:
@ -53,23 +45,21 @@ async def handle_command(event, client):
await handle_whitelist_command(event, command, args) await handle_whitelist_command(event, command, args)
async def handle_keyword_command(event, command, args): async def handle_keyword_command(event, command, args):
keywords = load_json(KEYWORDS_FILE)
if command == '/list': if command == '/list':
keywords = link_filter.keywords
await event.reply("当前关键词列表:\n" + "\n".join(keywords) if keywords else "关键词列表为空。") await event.reply("当前关键词列表:\n" + "\n".join(keywords) if keywords else "关键词列表为空。")
elif command == '/add' and args: elif command == '/add' and args:
keyword = args[0].lower() keyword = args[0]
if keyword not in keywords: normalized_keyword = link_filter.normalize_link(keyword) if link_filter.link_pattern.match(keyword) else keyword.lower()
keywords.append(keyword) if normalized_keyword not in link_filter.keywords:
save_json(KEYWORDS_FILE, keywords) link_filter.add_keyword(normalized_keyword)
await event.reply(f"关键词 '{keyword}' 已添加。") await event.reply(f"关键词 '{keyword}' 已添加。")
else: else:
await event.reply(f"关键词 '{keyword}' 已存在。") await event.reply(f"关键词 '{keyword}' 已存在。")
elif command == '/delete' and args: elif command == '/delete' and args:
keyword = args[0].lower() keyword = args[0]
if keyword in keywords: normalized_keyword = link_filter.normalize_link(keyword) if link_filter.link_pattern.match(keyword) else keyword.lower()
keywords.remove(keyword) if link_filter.remove_keyword(normalized_keyword):
save_json(KEYWORDS_FILE, keywords)
await event.reply(f"关键词 '{keyword}' 已删除。") await event.reply(f"关键词 '{keyword}' 已删除。")
else: else:
await event.reply(f"关键词 '{keyword}' 不存在。") await event.reply(f"关键词 '{keyword}' 不存在。")
@ -77,23 +67,22 @@ async def handle_keyword_command(event, command, args):
await event.reply("无效的命令或参数。") await event.reply("无效的命令或参数。")
async def handle_whitelist_command(event, command, args): async def handle_whitelist_command(event, command, args):
whitelist = load_json(WHITELIST_FILE)
if command == '/listwhite': if command == '/listwhite':
whitelist = link_filter.whitelist
await event.reply("白名单域名列表:\n" + "\n".join(whitelist) if whitelist else "白名单为空。") await event.reply("白名单域名列表:\n" + "\n".join(whitelist) if whitelist else "白名单为空。")
elif command == '/addwhite' and args: elif command == '/addwhite' and args:
domain = args[0].lower() domain = args[0].lower()
if domain not in whitelist: if domain not in link_filter.whitelist:
whitelist.append(domain) link_filter.whitelist.append(domain)
save_json(WHITELIST_FILE, whitelist) link_filter.save_whitelist()
await event.reply(f"域名 '{domain}' 已添加到白名单。") await event.reply(f"域名 '{domain}' 已添加到白名单。")
else: else:
await event.reply(f"域名 '{domain}' 已在白名单中。") await event.reply(f"域名 '{domain}' 已在白名单中。")
elif command == '/delwhite' and args: elif command == '/delwhite' and args:
domain = args[0].lower() domain = args[0].lower()
if domain in whitelist: if domain in link_filter.whitelist:
whitelist.remove(domain) link_filter.whitelist.remove(domain)
save_json(WHITELIST_FILE, whitelist) link_filter.save_whitelist()
await event.reply(f"域名 '{domain}' 已从白名单中删除。") await event.reply(f"域名 '{domain}' 已从白名单中删除。")
else: else:
await event.reply(f"域名 '{domain}' 不在白名单中。") await event.reply(f"域名 '{domain}' 不在白名单中。")
@ -101,12 +90,9 @@ async def handle_whitelist_command(event, command, args):
await event.reply("无效的命令或参数。") await event.reply("无效的命令或参数。")
def get_keywords(): def get_keywords():
return load_json(KEYWORDS_FILE) return link_filter.keywords
def get_whitelist(): def get_whitelist():
return load_json(WHITELIST_FILE) return link_filter.whitelist
__all__ = ['handle_command', 'get_keywords', 'get_whitelist', 'register_commands'] __all__ = ['handle_command', 'get_keywords', 'get_whitelist', 'register_commands']

View File

@ -60,21 +60,23 @@ async def delete_message_after_delay(client, chat, message, delay):
# 处理消息函数 # 处理消息函数
async def process_message(event, client): async def process_message(event, client):
if not event.is_private: if not event.is_private:
# 检查消息是否包含已知的关键词(包括之前添加的非白名单链接) # 检查消息是否应该被过滤
if any(keyword in event.message.text for keyword in link_filter.keywords): should_filter, new_links = link_filter.should_filter(event.message.text)
if should_filter:
if event.sender_id != ADMIN_ID: if event.sender_id != ADMIN_ID:
await event.delete() await event.delete()
notification = await event.respond("已撤回该消息。注:重复发送的推广链接会被自动撤回。") notification = await event.respond("已撤回该消息。注:包含关键词或重复发送的非白名单链接会被自动撤回。")
asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 3 * 60)) asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 3 * 60))
return return
# 检查是否有新的非白名单链接
new_links = link_filter.should_filter(event.message.text)
if new_links: if new_links:
# 这是第一次发送这些非白名单链接,我们允许消息通过,不发送任何警告 # 这是第一次发送这些非白名单链接,我们允许消息通过,不发送任何警告
# 如果需要,可以在这里添加日志记录或其他操作
pass pass
async def command_handler(event): async def command_handler(event):
if event.is_private and event.sender_id == ADMIN_ID: if event.is_private and event.sender_id == ADMIN_ID:
await handle_command(event, event.client) await handle_command(event, event.client)

View File

@ -1,6 +1,7 @@
import re import re
import json import json
import tldextract import tldextract
import urllib.parse
class LinkFilter: class LinkFilter:
def __init__(self, keywords_file, whitelist_file): def __init__(self, keywords_file, whitelist_file):
@ -38,42 +39,70 @@ class LinkFilter:
def is_whitelisted(self, link): def is_whitelisted(self, link):
extracted = tldextract.extract(link) extracted = tldextract.extract(link)
full_domain = '.'.join(part for part in [extracted.subdomain, extracted.domain, extracted.suffix] if part) domain = f"{extracted.domain}.{extracted.suffix}"
main_domain = f"{extracted.domain}.{extracted.suffix}" return domain in self.whitelist
# 检查完整域名(包括子域名)
if full_domain in self.whitelist:
return True
# 检查主域名
if main_domain in self.whitelist:
return True
# 检查是否有通配符匹配
wildcard_domain = f"*.{main_domain}"
if wildcard_domain in self.whitelist:
return True
return False
def add_keyword(self, link): def normalize_link(self, link):
if link not in self.keywords: # 解析链接
self.keywords.append(link) parsed = urllib.parse.urlparse(link)
# 如果没有 scheme添加 'https://'
if not parsed.scheme:
link = 'https://' + link
parsed = urllib.parse.urlparse(link)
# 重新组合链接,去除查询参数
normalized = urllib.parse.urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
'',
'',
''
))
return normalized.rstrip('/') # 移除尾部的斜杠
def add_keyword(self, keyword):
if self.link_pattern.match(keyword):
keyword = self.normalize_link(keyword)
if keyword not in self.keywords:
self.keywords.append(keyword)
self.save_keywords() self.save_keywords()
def remove_keyword(self, keyword):
if self.link_pattern.match(keyword):
keyword = self.normalize_link(keyword)
if keyword in self.keywords:
self.keywords.remove(keyword)
self.save_keywords()
return True
return False
def should_filter(self, text): def should_filter(self, text):
# 检查是否包含关键词
if any(keyword.lower() in text.lower() for keyword in self.keywords if not self.link_pattern.match(keyword)):
return True, []
links = self.link_pattern.findall(text) links = self.link_pattern.findall(text)
new_non_whitelisted_links = [] new_non_whitelisted_links = []
for link in links: for link in links:
if not self.is_whitelisted(link): normalized_link = self.normalize_link(link)
if link not in self.keywords: if not self.is_whitelisted(normalized_link):
new_non_whitelisted_links.append(link) if normalized_link not in self.keywords:
self.add_keyword(link) new_non_whitelisted_links.append(normalized_link)
return new_non_whitelisted_links self.add_keyword(normalized_link)
else:
return True, [] # 如果找到已存在的非白名单链接,应该过滤
return False, new_non_whitelisted_links
def reload_keywords(self): def reload_keywords(self):
self.keywords = self.load_json(self.keywords_file) self.keywords = self.load_json(self.keywords_file)
def reload_whitelist(self): def reload_whitelist(self):
self.whitelist = self.load_json(self.whitelist_file) self.whitelist = self.load_json(self.whitelist_file)
def save_whitelist(self):
with open(self.whitelist_file, 'w') as f:
json.dump(self.whitelist, f)