diff --git a/src/guard.py b/src/guard.py index e58285b..9f32812 100644 --- a/src/guard.py +++ b/src/guard.py @@ -7,6 +7,11 @@ from collections import deque import time from link_filter import LinkFilter from bot_commands import handle_command +import logging + +# 设置日志 +logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger('TeleGuard') # 环境变量 BOT_TOKEN = os.environ.get('BOT_TOKEN') @@ -50,16 +55,18 @@ async def delete_message_after_delay(client, chat, message, delay): async def process_message(event, client): if not event.is_private: + logger.debug(f"Processing message: {event.message.text}") should_filter, new_links = link_filter.should_filter(event.message.text) if should_filter: + logger.info(f"Message should be filtered: {event.message.text}") if event.sender_id != ADMIN_ID: await event.delete() notification = await event.respond("已撤回该消息。注:包含关键词或重复发送的非白名单链接会被自动撤回。") asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 3 * 60)) return if new_links: - # 可以在这里添加日志记录或其他操作 - pass + logger.info(f"New non-whitelisted links found: {new_links}") + async def command_handler(event, link_filter): if event.is_private and event.sender_id == ADMIN_ID: diff --git a/src/link_filter.py b/src/link_filter.py index c33c4a4..646aaa8 100644 --- a/src/link_filter.py +++ b/src/link_filter.py @@ -2,6 +2,10 @@ import re import json import tldextract import urllib.parse +import logging + +logger = logging.getLogger('TeleGuard.LinkFilter') + class LinkFilter: def __init__(self, keywords_file, whitelist_file): @@ -16,15 +20,17 @@ class LinkFilter: (?: (?:https?://)? # http:// or https:// (optional) (?:(?:www\.)? # www. (optional) - (?:[a-zA-Z0-9-]+\.)+ # domain - [a-zA-Z]{2,} # TLD - | # or - (?:t\.me|telegram\.me) # Telegram links + (?:[a-zA-Z0-9-]+\.)+ # domain + [a-zA-Z]{2,} # TLD + | # or + (?:t\.me|telegram\.me) # Telegram links ) (?:/[^\s]*)? # optional path and query string ) \b ''', re.VERBOSE | re.IGNORECASE) + logger.info(f"LinkFilter initialized with keywords file: {keywords_file} and whitelist file: {whitelist_file}") + def load_json(self, file_path): try: @@ -50,12 +56,17 @@ class LinkFilter: def normalize_link(self, link): link = re.sub(r'^https?://', '', link) parsed = urllib.parse.urlparse(f"http://{link}") - return urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, '')).rstrip('/') + normalized = urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, '')) + result = normalized.rstrip('/') + logger.debug(f"Normalized link: {link} -> {result}") + return result def is_whitelisted(self, link): extracted = tldextract.extract(link) domain = f"{extracted.domain}.{extracted.suffix}" - return domain in self.whitelist + result = domain in self.whitelist + logger.debug(f"Whitelist check for {link}: {'Passed' if result else 'Failed'}") + return result def add_keyword(self, keyword): if self.link_pattern.match(keyword): @@ -63,6 +74,9 @@ class LinkFilter: if keyword not in self.keywords: self.keywords.append(keyword) self.save_keywords() + logger.info(f"New keyword added: {keyword}") + else: + logger.debug(f"Keyword already exists: {keyword}") def remove_keyword(self, keyword): if self.link_pattern.match(keyword): @@ -74,18 +88,25 @@ class LinkFilter: return False def should_filter(self, text): + logger.debug(f"Checking text: {text}") if any(keyword.lower() in text.lower() for keyword in self.keywords): + logger.info(f"Text contains keyword: {text}") return True, [] links = self.link_pattern.findall(text) + logger.debug(f"Found links: {links}") new_non_whitelisted_links = [] for link in links: normalized_link = self.normalize_link(link) if not self.is_whitelisted(normalized_link): + logger.debug(f"Link not whitelisted: {normalized_link}") if normalized_link not in self.keywords: new_non_whitelisted_links.append(normalized_link) self.add_keyword(normalized_link) else: + logger.info(f"Existing keyword found: {normalized_link}") return True, [] + if new_non_whitelisted_links: + logger.info(f"New non-whitelisted links found: {new_non_whitelisted_links}") return False, new_non_whitelisted_links