实现日志记录以增强调试和监控

在`guard.py`和`link_filter.py`模块中集成日志记录功能,以促进更好的调试和运行时监控。添加了`logger`实例,并在关键操作中散布日志条目,记录消息处理、链接过滤决策和配置加载等活动。
This commit is contained in:
wood 2024-09-04 20:26:20 +08:00
parent b1cd9b3a2a
commit 8642737fa4
2 changed files with 36 additions and 8 deletions

View File

@ -7,6 +7,11 @@ from collections import deque
import time
from link_filter import LinkFilter
from bot_commands import handle_command
import logging
# 设置日志
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('TeleGuard')
# 环境变量
BOT_TOKEN = os.environ.get('BOT_TOKEN')
@ -50,16 +55,18 @@ async def delete_message_after_delay(client, chat, message, delay):
async def process_message(event, client):
if not event.is_private:
logger.debug(f"Processing message: {event.message.text}")
should_filter, new_links = link_filter.should_filter(event.message.text)
if should_filter:
logger.info(f"Message should be filtered: {event.message.text}")
if event.sender_id != ADMIN_ID:
await event.delete()
notification = await event.respond("已撤回该消息。注:包含关键词或重复发送的非白名单链接会被自动撤回。")
asyncio.create_task(delete_message_after_delay(client, event.chat_id, notification, 3 * 60))
return
if new_links:
# 可以在这里添加日志记录或其他操作
pass
logger.info(f"New non-whitelisted links found: {new_links}")
async def command_handler(event, link_filter):
if event.is_private and event.sender_id == ADMIN_ID:

View File

@ -2,6 +2,10 @@ import re
import json
import tldextract
import urllib.parse
import logging
logger = logging.getLogger('TeleGuard.LinkFilter')
class LinkFilter:
def __init__(self, keywords_file, whitelist_file):
@ -16,15 +20,17 @@ class LinkFilter:
(?:
(?:https?://)? # http:// or https:// (optional)
(?:(?:www\.)? # www. (optional)
(?:[a-zA-Z0-9-]+\.)+ # domain
[a-zA-Z]{2,} # TLD
| # or
(?:t\.me|telegram\.me) # Telegram links
(?:[a-zA-Z0-9-]+\.)+ # domain
[a-zA-Z]{2,} # TLD
| # or
(?:t\.me|telegram\.me) # Telegram links
)
(?:/[^\s]*)? # optional path and query string
)
\b
''', re.VERBOSE | re.IGNORECASE)
logger.info(f"LinkFilter initialized with keywords file: {keywords_file} and whitelist file: {whitelist_file}")
def load_json(self, file_path):
try:
@ -50,12 +56,17 @@ class LinkFilter:
def normalize_link(self, link):
link = re.sub(r'^https?://', '', link)
parsed = urllib.parse.urlparse(f"http://{link}")
return urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, '')).rstrip('/')
normalized = urllib.parse.urlunparse(('', parsed.netloc, parsed.path, parsed.params, parsed.query, ''))
result = normalized.rstrip('/')
logger.debug(f"Normalized link: {link} -> {result}")
return result
def is_whitelisted(self, link):
extracted = tldextract.extract(link)
domain = f"{extracted.domain}.{extracted.suffix}"
return domain in self.whitelist
result = domain in self.whitelist
logger.debug(f"Whitelist check for {link}: {'Passed' if result else 'Failed'}")
return result
def add_keyword(self, keyword):
if self.link_pattern.match(keyword):
@ -63,6 +74,9 @@ class LinkFilter:
if keyword not in self.keywords:
self.keywords.append(keyword)
self.save_keywords()
logger.info(f"New keyword added: {keyword}")
else:
logger.debug(f"Keyword already exists: {keyword}")
def remove_keyword(self, keyword):
if self.link_pattern.match(keyword):
@ -74,18 +88,25 @@ class LinkFilter:
return False
def should_filter(self, text):
logger.debug(f"Checking text: {text}")
if any(keyword.lower() in text.lower() for keyword in self.keywords):
logger.info(f"Text contains keyword: {text}")
return True, []
links = self.link_pattern.findall(text)
logger.debug(f"Found links: {links}")
new_non_whitelisted_links = []
for link in links:
normalized_link = self.normalize_link(link)
if not self.is_whitelisted(normalized_link):
logger.debug(f"Link not whitelisted: {normalized_link}")
if normalized_link not in self.keywords:
new_non_whitelisted_links.append(normalized_link)
self.add_keyword(normalized_link)
else:
logger.info(f"Existing keyword found: {normalized_link}")
return True, []
if new_non_whitelisted_links:
logger.info(f"New non-whitelisted links found: {new_non_whitelisted_links}")
return False, new_non_whitelisted_links