From 8642737fa4f474c80a40b712eb15145a8f3e2e13 Mon Sep 17 00:00:00 2001 From: wood Date: Wed, 4 Sep 2024 20:26:20 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E6=97=A5=E5=BF=97=E8=AE=B0?= =?UTF-8?q?=E5=BD=95=E4=BB=A5=E5=A2=9E=E5=BC=BA=E8=B0=83=E8=AF=95=E5=92=8C?= =?UTF-8?q?=E7=9B=91=E6=8E=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在`guard.py`和`link_filter.py`模块中集成日志记录功能,以促进更好的调试和运行时监控。添加了`logger`实例,并在关键操作中散布日志条目,记录消息处理、链接过滤决策和配置加载等活动。 --- src/guard.py | 11 +++++++++-- src/link_filter.py | 33 +++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/guard.py b/src/guard.py index e58285b..9f32812 100644 --- a/src/guard.py +++ b/src/guard.py @@ -7,6 +7,11 @@ from collections import deque import time from link_filter import LinkFilter from bot_commands import handle_command +import logging + +# 设置日志 +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('TeleGuard') # 环境变量 BOT_TOKEN = os.environ.get('BOT_TOKEN') @@ -50,16 +55,18 @@ async def delete_message_after_delay(client, chat, message, delay): async def process_message(event, client): if not event.is_private: + logger.debug(f"Processing message: {event.message.text}") should_filter, new_links = link_filter.should_filter(event.message.text) if should_filter: + logger.info(f"Message should be filtered: {event.message.text}") if event.sender_id != ADMIN_ID: await event.delete() notification = await event.respond("已撤回该消息。注:包含关键词或重复发送的非白名单链接会被自动撤回。") asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 3 * 60)) return if new_links: - # 可以在这里添加日志记录或其他操作 - pass + logger.info(f"New non-whitelisted links found: {new_links}") + async def command_handler(event, link_filter): if event.is_private and event.sender_id == ADMIN_ID: diff --git a/src/link_filter.py b/src/link_filter.py index c33c4a4..646aaa8 100644 --- a/src/link_filter.py +++ b/src/link_filter.py @@ -2,6 +2,10 @@ import re import json import tldextract import urllib.parse +import logging + +logger = logging.getLogger('TeleGuard.LinkFilter') + class LinkFilter: def __init__(self, keywords_file, whitelist_file): @@ -16,15 +20,17 @@ class LinkFilter: (?: (?:https?://)? # http:// or https:// (optional) (?:(?:www\.)? # www. (optional) - (?:[a-zA-Z0-9-]+\.)+ # domain - [a-zA-Z]{2,} # TLD - | # or - (?:t\.me|telegram\.me) # Telegram links + (?:[a-zA-Z0-9-]+\.)+ # domain + [a-zA-Z]{2,} # TLD + | # or + (?:t\.me|telegram\.me) # Telegram links ) (?:/[^\s]*)? # optional path and query string ) \b ''', re.VERBOSE | re.IGNORECASE) + logger.info(f"LinkFilter initialized with keywords file: {keywords_file} and whitelist file: {whitelist_file}") + def load_json(self, file_path): try: @@ -50,12 +56,17 @@ class LinkFilter: def normalize_link(self, link): link = re.sub(r'^https?://', '', link) parsed = urllib.parse.urlparse(f"http://{link}") - return urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, '')).rstrip('/') + normalized = urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, '')) + result = normalized.rstrip('/') + logger.debug(f"Normalized link: {link} -> {result}") + return result def is_whitelisted(self, link): extracted = tldextract.extract(link) domain = f"{extracted.domain}.{extracted.suffix}" - return domain in self.whitelist + result = domain in self.whitelist + logger.debug(f"Whitelist check for {link}: {'Passed' if result else 'Failed'}") + return result def add_keyword(self, keyword): if self.link_pattern.match(keyword): @@ -63,6 +74,9 @@ class LinkFilter: if keyword not in self.keywords: self.keywords.append(keyword) self.save_keywords() + logger.info(f"New keyword added: {keyword}") + else: + logger.debug(f"Keyword already exists: {keyword}") def remove_keyword(self, keyword): if self.link_pattern.match(keyword): @@ -74,18 +88,25 @@ class LinkFilter: return False def should_filter(self, text): + logger.debug(f"Checking text: {text}") if any(keyword.lower() in text.lower() for keyword in self.keywords): + logger.info(f"Text contains keyword: {text}") return True, [] links = self.link_pattern.findall(text) + logger.debug(f"Found links: {links}") new_non_whitelisted_links = [] for link in links: normalized_link = self.normalize_link(link) if not self.is_whitelisted(normalized_link): + logger.debug(f"Link not whitelisted: {normalized_link}") if normalized_link not in self.keywords: new_non_whitelisted_links.append(normalized_link) self.add_keyword(normalized_link) else: + logger.info(f"Existing keyword found: {normalized_link}") return True, [] + if new_non_whitelisted_links: + logger.info(f"New non-whitelisted links found: {new_non_whitelisted_links}") return False, new_non_whitelisted_links