From f6fc01c26aab9fce8c1f2c0d8677a087a737a0d8 Mon Sep 17 00:00:00 2001 From: wood Date: Wed, 4 Sep 2024 17:08:28 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B6=88=E6=81=AF=E5=A4=84=E7=90=86=E5=92=8C?= =?UTF-8?q?=E9=93=BE=E6=8E=A5=E8=BF=87=E6=BB=A4=E5=A2=9E=E5=BC=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 优化了消息处理逻辑,以检查并删除包含已知关键词或新非白名单链接的消息。 - 扩展了链接过滤器,以支持子域名、主域名和通配符匹配,提高了过滤器的灵活性和有效性。 - 修正了should_filter方法,首次发送的非白名单链接不会触发消息删除,仅记录链接以供未来检查。 - 调整了代码以使用环境变量加载关键词和白名单文件路径,提高了配置的灵活性。 - 限速器和消息删除逻辑进行了优化,调整了通知消息的自动删除时间间隔。 --- src/guard.py | 25 ++++++++++++------------- src/link_filter.py | 28 ++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/guard.py b/src/guard.py index a01ccc9..e02282d 100644 --- a/src/guard.py +++ b/src/guard.py @@ -5,7 +5,7 @@ import time from telethon import TelegramClient, events from collections import deque from link_filter import LinkFilter -from bot_commands import handle_command, get_keywords +from bot_commands import handle_command # 环境变量 @@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level logger = logging.getLogger('TeleGuard') # 创建 LinkFilter 实例 -link_filter = LinkFilter('/app/data/keywords.json', '/app/data/whitelist.json') +link_filter = LinkFilter(KEYWORDS_FILE, WHITELIST_FILE) # 限速器 class RateLimiter: @@ -60,21 +60,20 @@ async def delete_message_after_delay(client, chat, message, delay): # 处理消息函数 async def process_message(event, client): if not event.is_private: - # 检查消息是否包含需要过滤的链接 - if link_filter.should_filter(event.message.text): + # 检查消息是否包含已知的关键词(包括之前添加的非白名单链接) + if any(keyword in event.message.text for keyword in link_filter.keywords): if event.sender_id != ADMIN_ID: await event.delete() - notification = await event.respond("已撤回该消息。注:重复发送的链接会被自动撤回。") - asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 30 * 60)) + notification = await event.respond("已撤回该消息。注:重复发送的推广链接会被自动撤回。") + asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 3 * 60)) return - # 检查关键词 - keywords = get_keywords() - if any(keyword in event.message.text.lower() for keyword in keywords): - if event.sender_id != ADMIN_ID: - await event.delete() - notification = await event.respond("已撤回该消息。注:已发送的推广链接不要多次发送,置顶已有项目的推广链接也会自动撤回。") - asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 30 * 60)) + # 检查是否有新的非白名单链接 + new_links = link_filter.should_filter(event.message.text) + if new_links: + # 这是第一次发送这些非白名单链接,我们允许消息通过,不发送任何警告 + pass + async def command_handler(event): if event.is_private and event.sender_id == ADMIN_ID: diff --git a/src/link_filter.py b/src/link_filter.py index c197972..7eaf91e 100644 --- a/src/link_filter.py +++ b/src/link_filter.py @@ -38,8 +38,24 @@ class LinkFilter: def is_whitelisted(self, link): extracted = tldextract.extract(link) - domain = f"{extracted.domain}.{extracted.suffix}" - return domain in self.whitelist + full_domain = '.'.join(part for part in [extracted.subdomain, extracted.domain, extracted.suffix] if part) + main_domain = f"{extracted.domain}.{extracted.suffix}" + + # 检查完整域名(包括子域名) + if full_domain in self.whitelist: + return True + + # 检查主域名 + if main_domain in self.whitelist: + return True + + # 检查是否有通配符匹配 + wildcard_domain = f"*.{main_domain}" + if wildcard_domain in self.whitelist: + return True + + return False + def add_keyword(self, link): if link not in self.keywords: @@ -48,13 +64,13 @@ class LinkFilter: def should_filter(self, text): links = self.link_pattern.findall(text) + new_non_whitelisted_links = [] for link in links: if not self.is_whitelisted(link): - if link in self.keywords: - return True - else: + if link not in self.keywords: + new_non_whitelisted_links.append(link) self.add_keyword(link) - return False + return new_non_whitelisted_links def reload_keywords(self): self.keywords = self.load_json(self.keywords_file)