From 6b5d0dd1fedda72b66ad7e7401d8d7946a5c4653 Mon Sep 17 00:00:00 2001 From: klemek Date: Wed, 6 Jan 2021 18:58:19 +0100 Subject: [PATCH] working logs query --- bot.py | 7 ++ emotes.py | 137 ++++++++++++----------------------- log_manager.py | 189 ++++++++++++++++++++++++++++++++----------------- utils.py | 27 +++---- 4 files changed, 188 insertions(+), 172 deletions(-) diff --git a/bot.py b/bot.py index e497c99..f012335 100755 --- a/bot.py +++ b/bot.py @@ -1,7 +1,14 @@ from miniscord import Bot +import logging import emotes +logging.basicConfig( + format="[%(asctime)s][%(levelname)s][%(module)s] %(message)s", level=logging.INFO +) + +emotes.load_emojis() + bot = Bot( "Discord Analyst", # name "1.4", # version diff --git a/emotes.py b/emotes.py index 8b06ce0..1b4e27f 100644 --- a/emotes.py +++ b/emotes.py @@ -4,9 +4,11 @@ from collections import defaultdict import discord import re import json +import logging # Custom libs from utils import debug, aggregate, no_duplicate +from log_manager import GuildLogs, ChannelLogs, MessageLog # CONSTANTS @@ -88,11 +90,9 @@ def load_emojis(): GLOBAL_EMOJIS[unicode] = shortcode unicode_list += [unicode_escaped] EMOJI_REGEX = re.compile(f"(|:\\w+:|{'|'.join(unicode_list)})") + logging.info(f"loaded {len(GLOBAL_EMOJIS)} emojis") -load_emojis() -print(f"loaded {len(GLOBAL_EMOJIS)} emojis") - # MAIN HELP = ( @@ -112,6 +112,7 @@ async def compute(client: discord.client, message: discord.Message, *args: str): Computes the %emotes command """ guild = message.guild + logs = GuildLogs(guild) # If "%emotes help" redirect to "%help emotes" if "help" in args: @@ -131,45 +132,27 @@ async def compute(client: discord.client, message: discord.Message, *args: str): # Get selected members members = no_duplicate(message.mentions) + raw_members = no_duplicate(message.raw_mentions) # Start computing data async with message.channel.typing(): - nm = 0 # number of messages treated - nmm = 0 # number of impacted messages - nc = 0 # number of channel treated - t0 = datetime.now() - # Show custom progress message and keep it to update it later - progress = await message.channel.send("```starting analysis...```") - # Analyse every channel selected - for channel in channels: - nm1, nmm1 = await analyse_channel( - channel, - emotes, - members, - progress, - nm, - nc, - all_emojis="all" in args, - analyse_members_reactions="reactions" in args, + progress = await message.channel.send("```Starting analysis...```") + total_msg, total_chan = await logs.load(progress, channels) + for id in logs.channels: + analyse_channel( + logs.channels[id], emotes, raw_members, all_emojis="all" in args ) - # If treatment was successful, increase numbers - if nm1 >= 0: - nm += nm1 - nmm += nmm1 - nc += 1 # Delete custom progress message await progress.delete() # Display results await tell_results( - get_intro(emotes, full, channels, members, nmm, nc), + get_intro(emotes, full, channels, members, total_msg, total_chan), emotes, message.channel, - nmm, + total_msg, allow_unused=full and len(members) == 0, show_life=False, ) - dt = (datetime.now() - t0).total_seconds() - debug(message, f"response sent {dt} s -> {nm / dt} m/s") # CLASSES @@ -217,72 +200,42 @@ class Emote: # ANALYSIS -async def analyse_channel( - channel: discord.TextChannel, +def analyse_channel( + channel: ChannelLogs, emotes: Dict[str, Emote], - members: List[discord.Member], - progress: discord.Message, - nm0: int, # number of already analysed messages - nc: int, # number of already analysed channels, + raw_members: List[int], *, all_emojis: bool, - analyse_members_reactions: bool, -) -> Tuple[int, int]: - nm = 0 - nmm = 0 - try: - last_message = None - done = 0 - while done >= CHUNK_SIZE or last_message is None: - done = 0 - async for m in channel.history( - limit=CHUNK_SIZE, before=last_message, oldest_first=False - ): - done += 1 - last_message = m - # If author is not bot or included in the selection (empty list is all) - if not m.author.bot and (len(members) == 0 or m.author in members): - # Find all emotes un the current message in the form "<:emoji:123456789>" - # Filter for known emotes - found = EMOJI_REGEX.findall(m.content) - # For each emote, update its usage - for name in found: - if name not in emotes: - if not all_emojis or name not in GLOBAL_EMOJIS: - continue - name = GLOBAL_EMOJIS[name] - emotes[name].usages += 1 - emotes[name].update_use(m.created_at) - # Count this message as impacted - nmm += 1 - - # For each reaction of this message, test if known emote and update when it's the case - for reaction in m.reactions: - name = str(reaction.emoji) - if name not in emotes: - if not all_emojis or name not in GLOBAL_EMOJIS: - continue - name = GLOBAL_EMOJIS[name] - if len(members) == 0: - emotes[name].reactions += reaction.count - emotes[name].update_use(m.created_at) - elif analyse_members_reactions: - users = await reaction.users().flatten() - for member in members: - if member in users: - emotes[name].reactions += 1 - emotes[name].update_use(m.created_at) - nm += done - # await progress.edit( - # content=f"```{nm0 + nm:,} messages and {nc} channels analysed```" - # ) - # await progress.edit( - # content=f"```{nm0 + nm:,} messages and {nc+1} channels analysed```" - # ) - return nm, nmm - except discord.errors.HTTPException: - # When an exception occurs (like Forbidden) sent -1 - return -1, -1 +): + for message in channel.messages: + # If author included in the selection (empty list is all) + if len(raw_members) == 0 or message.author in raw_members: + # Find all emotes un the current message in the form "<:emoji:123456789>" + # Filter for known emotes + found = EMOJI_REGEX.findall(message.content) + # For each emote, update its usage + for name in found: + if name not in emotes: + if not all_emojis or name not in GLOBAL_EMOJIS: + continue + name = GLOBAL_EMOJIS[name] + emotes[name].usages += 1 + emotes[name].update_use(message.created_at) + # For each reaction of this message, test if known emote and update when it's the case + for name in message.reactions: + raw_name = name + if name not in emotes: + if not all_emojis or name not in GLOBAL_EMOJIS: + continue + name = GLOBAL_EMOJIS[name] + if len(raw_members) == 0: + emotes[name].reactions += len(message.reactions[raw_name]) + emotes[name].update_use(message.created_at) + else: + for member in raw_members: + if member in message.reactions[raw_name]: + emotes[name].reactions += 1 + emotes[name].update_use(message.created_at) # RESULTS diff --git a/log_manager.py b/log_manager.py index c089973..8b0cebe 100644 --- a/log_manager.py +++ b/log_manager.py @@ -1,7 +1,9 @@ -from typing import Union, List +from typing import Union, List, Tuple import os import discord import json +from datetime import datetime +import logging LOG_DIR = "logs" @@ -12,48 +14,64 @@ if not os.path.exists(LOG_DIR): CHUNK_SIZE = 1000 +class FakeMessage: + def __init__(self, id: int): + self.id = id + + class MessageLog: def __init__(self, message: Union[discord.Message, dict]): if isinstance(message, discord.Message): self.id = message.id self.created_at = message.created_at self.edited_at = message.edited_at - self.author = message.author + self.author = message.author.id self.pinned = message.pinned self.mention_everyone = message.mention_everyone self.tts = message.tts - self.reference = message.reference.id + self.reference = ( + message.reference.id if message.reference is not None else None + ) self.content = message.content self.mentions = message.raw_mentions self.role_mentions = message.raw_role_mentions self.channel_mentions = message.raw_channel_mentions self.reactions = {} elif isinstance(message, dict): - self.id = message["id"] - self.created_at = message["created_at"] - self.edited_at = message["edited_at"] + self.id = int(message["id"]) + self.created_at = datetime.fromisoformat(message["created_at"]) + self.edited_at = ( + datetime.fromisoformat(message["edited_at"]) + if message["edited_at"] is not None + else None + ) self.author = message["author"] self.pinned = message["pinned"] self.mention_everyone = message["mention_everyone"] self.tts = message["tts"] - self.reference = message["reference.id"] + self.reference = message["reference"] self.content = message["content"] - self.mentions = message["raw_mentions"] - self.role_mentions = message["raw_role_mentions"] - self.channel_mentions = message["raw_channel_mentions"] + self.mentions = message["mentions"] + self.role_mentions = message["role_mentions"] + self.channel_mentions = message["channel_mentions"] self.reactions = message["reactions"] async def load(self, message: discord.Message): for reaction in message.reactions: - self.reactions[str(reaction)] = [] + self.reactions[str(reaction.emoji)] = [] async for user in reaction.users(): - self.reactions[str(reaction)] += user.id + self.reactions[str(reaction.emoji)] += [user.id] - def dict(self): - return self.__dict__ + def dict(self) -> dict: + message = dict(self.__dict__) + message["created_at"] = self.created_at.isoformat() + message["edited_at"] = ( + self.edited_at.isoformat() if self.edited_at is not None else None + ) + return message -class ChannelLog: +class ChannelLogs: def __init__(self, channel: Union[discord.TextChannel, dict]): if isinstance(channel, discord.TextChannel): self.id = channel.id @@ -61,82 +79,127 @@ class ChannelLog: self.last_message_id = None self.messages = [] elif isinstance(channel, dict): - self.id = channel["id"] + self.id = int(channel["id"]) self.name = channel["name"] self.last_message_id = channel["last_message_id"] self.messages = [MessageLog(message) for message in channel["messages"]] - async def load(self, channel: discord.TextChannel): + async def load(self, channel: discord.TextChannel) -> Tuple[int, int]: self.name = channel.name - if self.last_message_id is not None: # append - while self.last_message_id != channel.last_message_id: - async for message in channel.history( - limit=CHUNK_SIZE, after=self.last_message_id, oldest_first=True - ): - self.last_message_id = message.id - m = MessageLog(message) - await m.load(message) - self.messages.insert(0, m) - yield len(self.messages), False - else: # first load - last_message_id = None - done = 0 - while done >= CHUNK_SIZE or last_message_id is None: + self.channel = channel + try: + if self.last_message_id is not None: # append + while self.last_message_id != channel.last_message_id: + async for message in channel.history( + limit=CHUNK_SIZE, + after=FakeMessage(self.last_message_id), + oldest_first=True, + ): + self.last_message_id = message.id + if not message.author.bot: + m = MessageLog(message) + await m.load(message) + self.messages.insert(0, m) + yield len(self.messages), False + else: # first load + last_message_id = None done = 0 - async for message in channel.history( - limit=CHUNK_SIZE, after=self.last_message_id, oldest_first=False - ): - done += 1 - last_message_id = message.id - m = MessageLog(message) - await m.load(message) - self.messages += [m] - yield len(self.messages), False - self.last_message_id == channel.last_message_id + while done >= CHUNK_SIZE or last_message_id is None: + done = 0 + async for message in channel.history( + limit=CHUNK_SIZE, + before=FakeMessage(last_message_id) + if last_message_id is not None + else None, + oldest_first=False, + ): + done += 1 + last_message_id = message.id + if not message.author.bot: + m = MessageLog(message) + await m.load(message) + self.messages += [m] + yield len(self.messages), False + self.last_message_id = channel.last_message_id + except discord.errors.HTTPException: + return # When an exception occurs (like Forbidden) yield len(self.messages), True - def dict(self): - tmp = self.__dict__ - tmp["messages"] = [message.dict() for message in self.messages] - return tmp + def dict(self) -> dict: + channel = dict(self.__dict__) + channel.pop("channel", None) + channel["messages"] = [message.dict() for message in self.messages] + return channel class GuildLogs: def __init__(self, guild: discord.Guild): self.guild = guild - self.log_file = os.path.join(LOG_DIR, f"{guild}.logz") + self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz") self.channels = {} - def dict(self): + def dict(self) -> dict: return {id: self.channels[id].dict() for id in self.channels} - async def load(self, target_channels: List[discord.TextChannel] = []): + async def load( + self, progress: discord.Message, target_channels: List[discord.TextChannel] = [] + ): + await progress.edit( + content=f"```Reading history...\n(this might take a while)```" + ) # read logs + t0 = datetime.now() if os.path.exists(self.log_file): channels = {} - with open(self.log_file, mode="r") as f: - channels = json.loads(f.readline().strip()) - self.channels = {id: ChannelLog(channels[id]) for id in channels} + try: + with open(self.log_file, mode="r") as f: + channels = json.loads(f.readline().strip()) + self.channels = {int(id): ChannelLogs(channels[id]) for id in channels} + dt = (datetime.now() - t0).total_seconds() + logging.info(f"log {self.guild.id} > loaded in {dt} s") + except json.decoder.JSONDecodeError: + logging.error(f"log {self.guild.id} > invalid JSON") + except IOError: + logging.error(f"log {self.guild.id} > cannot read") # load channels + t0 = datetime.now() if len(target_channels) == 0: target_channels = self.guild.text_channels - loading_new = False + loading_new = 0 total_msg = 0 total_chan = 0 for channel in target_channels: if channel.id not in self.channels: - loading_new = True - self.channels[channel.id] = ChannelLog(channel) + loading_new += 1 + self.channels[channel.id] = ChannelLogs(channel) async for count, done in self.channels[channel.id].load(channel): - yield ( - total_msg + count, - total_chan + (1 if done else 0), - loading_new, - False, - ) + if count > 0: + tmp_msg = total_msg + count + warning_msg = "(this might take a while)" + if len(target_channels) > 5 and loading_new > 5: + warning_msg = ( + "(most channels are new, this might take a looong while)" + ) + elif loading_new > 0: + warning_msg = ( + "(some channels are new, this might take a long while)" + ) + dt = (datetime.now() - t0).total_seconds() + await progress.edit( + content=f"```Reading history...\n{tmp_msg} messages in {total_chan + 1} channels ({round(tmp_msg/dt)}m/s)\n{warning_msg}```" + ) + if done: + total_chan += 1 total_msg += len(self.channels[channel.id].messages) - total_chan += 1 - yield total_msg, total_chan, loading_new, True + dt = (datetime.now() - t0).total_seconds() + await progress.edit( + content=f"```Analysing...\n{tmp_msg} messages in {total_chan} channels```" + ) + logging.info(f"log {self.guild.id} > queried in {dt} s -> {total_msg / dt} m/s") # write logs + t0 = datetime.now() with open(self.log_file, mode="w") as f: - f.write(json.dump(self.dict())) + f.write(json.dumps(self.dict())) + dt = (datetime.now() - t0).total_seconds() + logging.info(f"log {self.guild.id} > written in {dt} s") + return total_msg, total_chan diff --git a/utils.py b/utils.py index 66e8a49..b56cfc9 100644 --- a/utils.py +++ b/utils.py @@ -1,21 +1,18 @@ +from typing import List +import logging +import discord + # DISCORD API -def debug(message, txt): - """ - Print a log with the context of the current event - - :param message: message that triggered the event - :type message: discord.Message - :param txt: text of the log - :type txt: str - """ - print(f"{message.guild} > #{message.channel}: {txt}") +def debug(message: discord.Message, txt: str): + logging.info(f"{message.guild} > #{message.channel}: {txt}") # LISTS -def no_duplicate(seq): + +def no_duplicate(seq: list) -> list: """ Remove any duplicates on a list @@ -29,16 +26,12 @@ def no_duplicate(seq): # MESSAGE FORMATTING -def aggregate(names): + +def aggregate(names: List[str]) -> str: """ Aggregate names with , and & Example : "a, b, c & d" - - :param names: list of names - :type names: list[str] - :return: correct aggregation - :rtype: str """ if len(names) == 0: return ""