diff --git a/bot.py b/bot.py index f012335..61d37e7 100755 --- a/bot.py +++ b/bot.py @@ -1,13 +1,14 @@ from miniscord import Bot import logging +import emojis import emotes logging.basicConfig( format="[%(asctime)s][%(levelname)s][%(module)s] %(message)s", level=logging.INFO ) -emotes.load_emojis() +emojis.load_emojis() bot = Bot( "Discord Analyst", # name diff --git a/emojis.py b/emojis.py new file mode 100644 index 0000000..80570f2 --- /dev/null +++ b/emojis.py @@ -0,0 +1,85 @@ +import re +import json +import logging + +EXTRA_EMOJI = { + "thumbup": "1f44d", + "thumbdown": "1f44e", + "timer": "23f2-fe0f", + "cowboy": "1f920", + "clown": "1f921", + "newspaper2": "1f5de-fe0f", + "french_bread": "1f956", + "nerd": "1f913", + "zipper_mouth": "1f910", + "salad": "1f957", + "rolling_eyes": "1f644", + "basketball_player": "26f9-fe0f-200d-2642-fe0f", + "thinking": "1f914", + "e_mail": "2709-fe0f", + "slight_frown": "1f641", + "skull_crossbones": "2620-fe0f", + "hand_splayed": "1f590-fe0f", + "speaking_head": "1f5e3-fe0f", + "cross": "271d-fe0f", + "crayon": "1f58d-fe0f", + "head_bandage": "1f915", + "rofl": "1f923", + "flag_white": "1f3f3-fe0f", + "slight_smile": "1f642", + "fork_knife_plate": "1f37d-fe0f", + "robot": "1f916", + "hugging": "1f917", + "biohazard": "2623-fe0f", + "notepad_spiral": "1f5d2-fe0f", + "lifter": "1f3cb-fe0f-200d-2642-fe0f", + "race_car": "1f3ce-fe0f", + "left_facing_fist": "1f91b", + "right_facing_fist": "1f91c", + "tools": "1f6e0-fe0f", + "umbrella2": "2602-fe0f", + "upside_down": "2b07-fe0f", + "first_place": "1f947", + "dagger": "1f5e1-fe0f", + "fox": "1f98a", + "menorah": "1f54e", + "desktop": "1f5a5-fe0f", + "motorcycle": "1f3cd-fe0f", + "levitate": "1f574-fe0f", + "cheese": "1f9c0", + "fingers_crossed": "1f91e", + "frowning2": "1f626", + "microphone2": "1f399-fe0f", + "flag_black": "1f3f4", + "chair": "1FA91", + "champagne_glass": "1F942", + "raised_hand": "270B", + "knife": "1F52A", + "postal_horn": "1F4EF", + "punch": "1F44A", +} + +global_list = {} +unicode_list = [] +regex = re.compile("(|:[\\w\\-\\~]+:)") + + +def load_emojis(): + global global_list, unicode_list, regex + emoji_list = [] + with open("emoji.json", mode="r") as f: + emoji_list = json.loads(f.readline().strip()) + for emoji in EXTRA_EMOJI: + emoji_list += [{"short_name": emoji, "unified": EXTRA_EMOJI[emoji]}] + unicode_list = [] + for emoji in emoji_list: + shortname = emoji["short_name"] + unified = emoji["unified"] + if unified is not None and shortname is not None: + unicode_escaped = "".join([f"\\U{c:0>8}" for c in unified.split("-")]) + unicode = bytes(unicode_escaped, "ascii").decode("unicode-escape") + shortcode = shortname.replace("-", "_") + global_list[unicode] = f":{shortcode}:" + unicode_list += [unicode_escaped] + regex = re.compile(f"(|:\\w+:|{'|'.join(unicode_list)})") + logging.info(f"loaded {len(unicode_list)} emojis") diff --git a/emotes.py b/emotes.py index 28f3295..72aa812 100644 --- a/emotes.py +++ b/emotes.py @@ -7,92 +7,15 @@ import json import logging # Custom libs -from utils import debug, aggregate, no_duplicate -from log_manager import GuildLogs, ChannelLogs, MessageLog + +from utils import aggregate, no_duplicate +from log_manager import GuildLogs, ChannelLogs +import emojis # CONSTANTS CHUNK_SIZE = 1000 -# preload - -EXTRA_EMOJI = { - "thumbup": "1f44d", - "thumbdown": "1f44e", - "timer": "23f2-fe0f", - "cowboy": "1f920", - "clown": "1f921", - "newspaper2": "1f5de-fe0f", - "french_bread": "1f956", - "nerd": "1f913", - "zipper_mouth": "1f910", - "salad": "1f957", - "rolling_eyes": "1f644", - "basketball_player": "26f9-fe0f-200d-2642-fe0f", - "thinking": "1f914", - "e_mail": "2709-fe0f", - "slight_frown": "1f641", - "skull_crossbones": "2620-fe0f", - "hand_splayed": "1f590-fe0f", - "speaking_head": "1f5e3-fe0f", - "cross": "271d-fe0f", - "crayon": "1f58d-fe0f", - "head_bandage": "1f915", - "rofl": "1f923", - "flag_white": "1f3f3-fe0f", - "slight_smile": "1f642", - "fork_knife_plate": "1f37d-fe0f", - "robot": "1f916", - "hugging": "1f917", - "biohazard": "2623-fe0f", - "notepad_spiral": "1f5d2-fe0f", - "lifter": "1f3cb-fe0f-200d-2642-fe0f", - "race_car": "1f3ce-fe0f", - "left_facing_fist": "1f91b", - "right_facing_fist": "1f91c", - "tools": "1f6e0-fe0f", - "umbrella2": "2602-fe0f", - "upside_down": "2b07-fe0f", - "first_place": "1f947", - "dagger": "1f5e1-fe0f", - "fox": "1f98a", - "menorah": "1f54e", - "desktop": "1f5a5-fe0f", - "motorcycle": "1f3cd-fe0f", - "levitate": "1f574-fe0f", - "cheese": "1f9c0", - "fingers_crossed": "1f91e", - "frowning2": "1f626", - "microphone2": "1f399-fe0f", - "flag_black": "1f3f4", - "chair": "1FA91", -} - -GLOBAL_EMOJIS = {} -EMOJI_REGEX = re.compile("(|:\\w+:)") - - -def load_emojis(): - global GLOBAL_EMOJIS, INV_GLOBAL_EMOJIS, EMOJI_REGEX - emoji_list = [] - with open("emoji.json", mode="r") as f: - emoji_list = json.loads(f.readline().strip()) - for emoji in EXTRA_EMOJI: - emoji_list += [{"short_name": emoji, "unified": EXTRA_EMOJI[emoji]}] - unicode_list = [] - for emoji in emoji_list: - shortcode = emoji["short_name"] - unified = emoji["unified"] - if unified is not None and shortcode is not None: - unicode_escaped = "".join([f"\\U{c:0>8}" for c in unified.split("-")]) - unicode = bytes(unicode_escaped, "ascii").decode("unicode-escape") - shortcode = f":{shortcode.replace('-','_')}:" - GLOBAL_EMOJIS[unicode] = shortcode - unicode_list += [unicode_escaped] - EMOJI_REGEX = re.compile(f"(|:\\w+:|{'|'.join(unicode_list)})") - logging.info(f"loaded {len(GLOBAL_EMOJIS)} emojis") - - # MAIN HELP = ( @@ -130,6 +53,12 @@ async def compute(client: discord.client, message: discord.Message, *args: str): if full: channels = guild.text_channels + # get max emotes to view + top = 20 + for arg in args: + if arg.isdigit(): + top = int(arg) + # Get selected members members = no_duplicate(message.mentions) raw_members = no_duplicate(message.raw_mentions) @@ -139,22 +68,26 @@ async def compute(client: discord.client, message: discord.Message, *args: str): progress = await message.channel.send("```Starting analysis...```") total_msg, total_chan = await logs.load(progress, channels) msg_count = 0 + chan_count = 0 for id in logs.channels: - msg_count += analyse_channel( + count = analyse_channel( logs.channels[id], emotes, raw_members, all_emojis="all" in args ) + msg_count += count + chan_count += 1 if count > 0 else 0 await progress.edit(content=f"```Computing results...```") - # Delete custom progress message - await progress.delete() # Display results await tell_results( - get_intro(emotes, full, channels, members, msg_count, total_chan), + get_intro(emotes, full, channels, members, msg_count, chan_count), emotes, message.channel, total_msg, + top=top, allow_unused=full and len(members) == 0, show_life=False, ) + # Delete custom progress message + await progress.delete() # CLASSES @@ -211,35 +144,32 @@ def analyse_channel( ) -> int: count = 0 for message in channel.messages: - # If author included in the selection (empty list is all) - if len(raw_members) == 0 or message.author in raw_members: + # If author is included in the selection (empty list is all) + if not message.bot and (len(raw_members) == 0 or message.author in raw_members): count += 1 # Find all emotes un the current message in the form "<:emoji:123456789>" # Filter for known emotes - found = EMOJI_REGEX.findall(message.content) + found = emojis.regex.findall(message.content) # For each emote, update its usage for name in found: if name not in emotes: - if not all_emojis or name not in GLOBAL_EMOJIS: + if not all_emojis or name not in emojis.unicode_list: continue - name = GLOBAL_EMOJIS[name] emotes[name].usages += 1 emotes[name].update_use(message.created_at) - # For each reaction of this message, test if known emote and update when it's the case - for name in message.reactions: - raw_name = name - if name not in emotes: - if not all_emojis or name not in GLOBAL_EMOJIS: - continue - name = GLOBAL_EMOJIS[name] - if len(raw_members) == 0: - emotes[name].reactions += len(message.reactions[raw_name]) - emotes[name].update_use(message.created_at) - else: - for member in raw_members: - if member in message.reactions[raw_name]: - emotes[name].reactions += 1 - emotes[name].update_use(message.created_at) + # For each reaction of this message, test if known emote and update when it's the case + for name in message.reactions: + if name not in emotes: + if not all_emojis or name not in emojis.unicode_list: + continue + if len(raw_members) == 0: + emotes[name].reactions += len(message.reactions[name]) + emotes[name].update_use(message.created_at) + else: + for member in raw_members: + if member in message.reactions[name]: + emotes[name].reactions += 1 + emotes[name].update_use(message.created_at) return count @@ -251,12 +181,14 @@ async def tell_results( emotes: Dict[str, Emote], channel: discord.TextChannel, nmm: int, # number of impacted messages + top: int, # top n emojis *, allow_unused: bool, show_life: bool, ): names = [name for name in emotes] names.sort(key=lambda name: emotes[name].score(), reverse=True) + names = names[:top] res = [intro] res += [ f"{get_place(names.index(name))} {name} - " @@ -284,6 +216,7 @@ def get_intro( channels: List[discord.TextChannel], members: List[discord.Member], nmm: int, # number of messages impacted + nc: int, # number of impacted channels ) -> str: """ Get the introduction sentence of the response diff --git a/log_manager.py b/log_manager.py index 0787dd3..0aae03c 100644 --- a/log_manager.py +++ b/log_manager.py @@ -13,6 +13,7 @@ if not os.path.exists(LOG_DIR): CHUNK_SIZE = 1000 +FORMAT = 2 class FakeMessage: @@ -33,6 +34,7 @@ class MessageLog: self.reference = ( message.reference.message_id if message.reference is not None else None ) + self.bot = message.author.bot or message.author.system self.content = message.content self.mentions = message.raw_mentions self.role_mentions = message.raw_role_mentions @@ -53,6 +55,7 @@ class MessageLog: self.reference = ( int(message["reference"]) if message["reference"] is not None else None ) + self.bot = message["bot"] self.content = message["content"] self.mentions = [int(m) for m in message["mentions"]] self.role_mentions = [int(m) for m in message["role_mentions"]] @@ -80,8 +83,12 @@ class ChannelLogs: self.id = channel.id self.name = channel.name self.last_message_id = None + self.format = FORMAT self.messages = [] elif isinstance(channel, dict): + self.format = channel["format"] if "format" in channel else None + if self.format != FORMAT: + return self.id = int(channel["id"]) self.name = channel["name"] self.last_message_id = channel["last_message_id"] @@ -99,10 +106,9 @@ class ChannelLogs: oldest_first=True, ): self.last_message_id = message.id - if not message.author.bot: - m = MessageLog(message) - await m.load(message) - self.messages.insert(0, m) + m = MessageLog(message) + await m.load(message) + self.messages.insert(0, m) yield len(self.messages), False else: # first load last_message_id = None @@ -118,10 +124,9 @@ class ChannelLogs: ): done += 1 last_message_id = message.id - if not message.author.bot: - m = MessageLog(message) - await m.load(message) - self.messages += [m] + m = MessageLog(message) + await m.load(message) + self.messages += [m] yield len(self.messages), False self.last_message_id = channel.last_message_id except discord.errors.HTTPException: @@ -158,6 +163,12 @@ class GuildLogs: with open(self.log_file, mode="rb") as f: channels = json.loads(gzip.decompress(f.read())) self.channels = {int(id): ChannelLogs(channels[id]) for id in channels} + # remove invalid format + self.channels = { + id: self.channels[id] + for id in self.channels + if self.channels[id].format == FORMAT + } dt = (datetime.now() - t0).total_seconds() logging.info(f"log {self.guild.id} > loaded in {dt} s") except json.decoder.JSONDecodeError: @@ -196,7 +207,7 @@ class GuildLogs: total_msg += len(self.channels[channel.id].messages) dt = (datetime.now() - t0).total_seconds() await progress.edit( - content=f"```Analysing...\n{tmp_msg} messages in {total_chan} channels```" + content=f"```Saving...\n{tmp_msg} messages in {total_chan} channels```" ) logging.info(f"log {self.guild.id} > queried in {dt} s -> {total_msg / dt} m/s") # write logs @@ -205,4 +216,7 @@ class GuildLogs: f.write(gzip.compress(bytes(json.dumps(self.dict()), "utf-8"))) dt = (datetime.now() - t0).total_seconds() logging.info(f"log {self.guild.id} > written in {dt} s") + await progress.edit( + content=f"```Analysing...\n{tmp_msg} messages in {total_chan} channels```" + ) return total_msg, total_chan