diff --git a/README.md b/README.md index 32b94a1..d9b73ec 100644 --- a/README.md +++ b/README.md @@ -18,13 +18,24 @@ * %freq - frequency analysis * %compo - composition analysis * %pres - presence analysis -* %first - read first message -* %rand - read a random message -* %last - read last message -* %find - find specific words or phrases * %repeat - repeat last analysis (adding supplied arguments) * %mobile - fix @invalid-user for last command but mentions users * %gdpr - displays GDPR information +* %find - find specific words or phrases (you can use quotes to add spaces in queries, backticks define regexes) + * arguments: + * top - rank users for these queries +* %first - read first message (add text to filter like %find) + * arguments: + * image - pull an image instead of a message + * spoiler:allow/only - allow spoiler images +* %rand - read a random message (add text to filter like %find) + * arguments: + * image - pull an image instead of a message + * spoiler:allow/only - allow spoiler images +* %last - read last message (add text to filter like %find) + * arguments: + * image - pull an image instead of a message + * spoiler:allow/only - allow spoiler images * %emojis - rank emojis by their usage * arguments: * - top emojis, default is 20 @@ -61,6 +72,7 @@ * all/everyone - include bots messages * fast: only read cache * fresh: does not read cache + * nsfw:allow/only - allow messages from nsfw channels * mobile/mention: mentions users (fix @invalid-user bug) (Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y) @@ -113,6 +125,12 @@ python3 src/main.py ## Changelog +* **v1.15** + * `nsfw:allow/only` filter nsfw channels + * `%find` can use regexes + * `%first`, `%rand` and `%last` can be filter with specific keywords + * `%first`, `%rand` and `%last` can pull images + * bug fix * **v1.14** * `mobile/mention` arg to fix mobile bug * `%repeat`, `%mobile` to repeat commands diff --git a/src/data_types/history.py b/src/data_types/history.py index 27dff33..8a23925 100644 --- a/src/data_types/history.py +++ b/src/data_types/history.py @@ -3,13 +3,81 @@ import random # Custom libs -from utils import mention, from_now, str_datetime, message_link +from utils import ( + mention, + from_now, + str_datetime, + message_link, + SPLIT_TOKEN, + FilterLevel, + should_allow_spoiler, +) + +MAX_RANDOM_TRIES = 100 class History: def __init__(self): self.messages = [] + async def to_string_image(self, *, type: str, spoiler: FilterLevel) -> List[str]: + if len(self.messages) == 0: + return ["There was no messages matching your filters"] + message = None + intro = None + real_message = None + if type == "first": + self.messages.sort(key=lambda m: m.created_at) + index = 0 + while real_message is None and index < len(self.messages): + message = self.messages[index] + real_message = await message.fetch() + if real_message is not None and not should_allow_spoiler( + real_message, spoiler + ): + real_message = None + index += 1 + intro = f"First image out of {len(self.messages):,}" + elif type == "last": + self.messages.sort(key=lambda m: m.created_at, reverse=True) + index = 0 + while real_message is None and index < len(self.messages): + message = self.messages[index] + real_message = await message.fetch() + if real_message is not None and not should_allow_spoiler( + real_message, spoiler + ): + real_message = None + index += 1 + intro = f"Last image out of {len(self.messages):,}" + elif type == "random": + intro = f"Random image out of {len(self.messages):,}" + tries = 0 + while real_message is None and tries < MAX_RANDOM_TRIES: + message = random.choice(self.messages) + real_message = await message.fetch() + if real_message is not None and not should_allow_spoiler( + real_message, spoiler + ): + real_message = None + tries += 1 + + if real_message is None: + return ["There was no messages matching your filters"] + image = "" + if len(real_message.attachments) > 0: + image = real_message.attachments[0].url + elif len(real_message.embeds) > 0: + image = real_message.embeds[0].url + + return [ + intro, + f"{str_datetime(message.created_at)} ({from_now(message.created_at)}) {mention(message.author)} sent:", + f"<{message_link(message)}>", + SPLIT_TOKEN, + image, + ] + def to_string(self, *, type: str) -> List[str]: if len(self.messages) == 0: return ["There was no messages matching your filters"] diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py index a5e3857..07bb14e 100644 --- a/src/logs/channel_logs.py +++ b/src/logs/channel_logs.py @@ -1,16 +1,13 @@ from typing import Union, Tuple, Any import discord -from discord import message from datetime import datetime from . import MessageLog -from utils import FakeMessage +from utils import serialize, FakeMessage CHUNK_SIZE = 2000 FORMAT = 3 -NOT_SERIALIZED = ["channel", "guild", "start_date"] - class ChannelLogs: def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any): @@ -50,11 +47,17 @@ class ChannelLogs: def is_format(self): return self.format == FORMAT + def preload(self, channel: discord.TextChannel): + self.name = channel.name + self.channel = channel + + @property + def nsfw(self): + self.channel.nsfw + async def load( self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime ) -> Tuple[int, int]: - self.name = channel.name - self.channel = channel is_empty = self.last_message_id is None try: if is_empty: @@ -110,7 +113,7 @@ class ChannelLogs: tmp_message_id = self.last_message_id async for message in channel.history( limit=CHUNK_SIZE, - after=FakeMessage(self.last_message_id), + after=FakeMessage(self.first_message_id), oldest_first=True, ): last_message_date = message.created_at @@ -119,7 +122,7 @@ class ChannelLogs: await m.load(message) self.messages.insert(0, m) yield len(self.messages), False - except discord.errors.HTTPException: + except discord.errors.HTTPException as e: yield -1, True return # When an exception occurs (like Forbidden) self.start_date = ( @@ -128,8 +131,6 @@ class ChannelLogs: yield len(self.messages), True def dict(self) -> dict: - channel = dict(self.__dict__) - for key in NOT_SERIALIZED: - channel.pop(key, None) + channel = serialize(self, not_serialized=["channel", "guild", "start_date"]) channel["messages"] = [message.dict() for message in self.messages] return channel diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py index 7600077..8b2a4fc 100644 --- a/src/logs/guild_logs.py +++ b/src/logs/guild_logs.py @@ -215,6 +215,8 @@ class GuildLogs: ] ) total_chan = len(target_channels) + for channel in target_channels: + self.channels[channel.id].preload(channel) else: if not self.locked and not self.lock(): return ALREADY_RUNNING, 0 @@ -231,6 +233,7 @@ class GuildLogs: if channel.id not in self.channels or fresh: loading_new += 1 self.channels[channel.id] = ChannelLogs(channel, self) + self.channels[channel.id].preload(channel) workers += [ Worker(self.channels[channel.id], channel, start_date, stop_date) ] diff --git a/src/logs/message_log.py b/src/logs/message_log.py index 263c245..bd69b4b 100644 --- a/src/logs/message_log.py +++ b/src/logs/message_log.py @@ -1,16 +1,13 @@ -from typing import Union, Any +from typing import Optional, Union, Any import discord from datetime import datetime -from utils import is_extension +from utils import is_extension, serialize IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"] EMBED_IMAGES = ["image", "gifv"] -NOT_SERIALIZED = ["channel"] - - class MessageLog: def __init__(self, message: Union[discord.Message, dict], channel: Any): self.channel = channel @@ -80,12 +77,13 @@ class MessageLog: async for user in reaction.users(): self.reactions[str(reaction.emoji)] += [user.id] + async def fetch(self) -> Optional[discord.Message]: + try: + return await self.channel.channel.fetch_message(self.id) + except (discord.NotFound, discord.Forbidden, discord.HTTPException): + return None + def dict(self) -> dict: - message = dict(self.__dict__) - for key in NOT_SERIALIZED: - message.pop(key, None) - message["created_at"] = self.created_at.isoformat() - message["edited_at"] = ( - self.edited_at.isoformat() if self.edited_at is not None else None + return serialize( + self, not_serialized=["channel"], dates=["created_at", "edited_at"] ) - return message diff --git a/src/main.py b/src/main.py index 97dda5b..3fc6ce9 100644 --- a/src/main.py +++ b/src/main.py @@ -18,7 +18,7 @@ emojis.load_emojis() bot = Bot( "Discord Analyst", - "1.14", + "1.15", alias="%", ) @@ -81,7 +81,7 @@ bot.register_command( scanners.LastScanner.help(), ) bot.register_command( - "rand(om)?", + "(rand(om)?|mood)", lambda *args: scanners.RandomScanner().compute(*args), "rand: read a random message", scanners.RandomScanner.help(), diff --git a/src/scanners/emojis_scanner.py b/src/scanners/emojis_scanner.py index c1c0de2..c96498a 100644 --- a/src/scanners/emojis_scanner.py +++ b/src/scanners/emojis_scanner.py @@ -1,5 +1,4 @@ from typing import Dict, List -from collections import defaultdict import discord diff --git a/src/scanners/find_scanner.py b/src/scanners/find_scanner.py index b1dfd4a..54ece5c 100644 --- a/src/scanners/find_scanner.py +++ b/src/scanners/find_scanner.py @@ -1,6 +1,7 @@ -from typing import Dict, List +from typing import Dict, List, Optional, Tuple from collections import defaultdict import discord +import re # Custom libs @@ -21,7 +22,7 @@ class FindScanner(Scanner): def help() -> str: return generate_help( "find", - "Find specific words or phrases (you can use quotes to add spaces in queries)", + "Find specific words or phrases (you can use quotes to add spaces in queries, backticks define regexes)", args=[ "top - rank users for these queries", "all/everyone - include bots", @@ -43,17 +44,21 @@ class FindScanner(Scanner): self.top = "top" in args or len(self.other_args) == 1 if len(self.other_args) == 0: await message.channel.send( - "You need to add a query to find (you can use quotes to add spaces in queries)", + "You need to add a query to find (you can use quotes to add spaces in queries, backticks define regexes)", reference=message, ) return False + self.queries = [ + (query, query.strip("`") if re.match(r"^`.*`$", query) else None) + for query in self.other_args + ] return True def compute_message(self, channel: ChannelLogs, message: MessageLog): return FindScanner.analyse_message( message, self.matches, - self.other_args, + self.queries, self.raw_members, all_messages=self.all_messages, top=self.top, @@ -77,7 +82,9 @@ class FindScanner(Scanner): res += [ self.matches[match].to_string( matches.index(match), - f'"{escape_text(match)}"', + f'"{escape_text(match)}"' + if len(match.strip("`")) == len(match) + else match, total_usage=self.msg_count, ranking=False, transform=lambda id: f" by {mention(id)}", @@ -97,7 +104,7 @@ class FindScanner(Scanner): def analyse_message( message: MessageLog, matches: Dict[str, Counter], - queries: List[str], + queries: List[Tuple[str, Optional[str]]], raw_members: List[int], *, all_messages: bool, @@ -113,10 +120,15 @@ class FindScanner(Scanner): impacted = True content = message.content.lower() for query in queries: - count = content.count(query.lower()) + if query[1] is not None: + count = len(re.findall(query[1], message.content)) + else: + count = content.count(query[0].lower()) if top: if count > 0: matches[message.author].update_use(count, message.created_at) else: - matches[query].update_use(count, message.created_at, message.author) + matches[query[0]].update_use( + count, message.created_at, message.author + ) return impacted diff --git a/src/scanners/first_scanner.py b/src/scanners/first_scanner.py index 1048e2c..ba7622b 100644 --- a/src/scanners/first_scanner.py +++ b/src/scanners/first_scanner.py @@ -9,10 +9,22 @@ from utils import generate_help class FirstScanner(HistoryScanner): @staticmethod def help() -> str: - return generate_help("first", "Read first message") + return generate_help( + "first", + "Read first message (add text to filter like %find)", + args=[ + "image - pull an image instead of a message", + "spoiler:allow/only - allow spoiler images", + ], + ) def __init__(self): super().__init__(help=FirstScanner.help()) - def get_results(self, intro: str) -> List[str]: - return self.history.to_string(type="first") + async def get_results(self, intro: str) -> List[str]: + if self.images_only: + return await self.history.to_string_image( + type="first", spoiler=self.spoiler + ) + else: + return self.history.to_string(type="first") diff --git a/src/scanners/history_scanner.py b/src/scanners/history_scanner.py index 5a3ae4c..aecf5a5 100644 --- a/src/scanners/history_scanner.py +++ b/src/scanners/history_scanner.py @@ -1,26 +1,46 @@ from abc import ABC, abstractmethod -from typing import List +from typing import List, Tuple, Optional import discord +import re # Custom libs from .scanner import Scanner from data_types import History from logs import ChannelLogs, MessageLog +from utils import FilterLevel class HistoryScanner(Scanner, ABC): def __init__(self, *, help: str): super().__init__( has_digit_args=True, - valid_args=["all", "everyone"], + valid_args=["all", "everyone", "spoiler", "spoiler:allow", "spoiler:only"], help=help, intro_context="", + all_args=True, ) async def init(self, message: discord.Message, *args: str) -> bool: self.history = History() self.all_messages = "all" in args or "everyone" in args + self.images_only = "image" in args + if "spoiler" in args or "spoiler:allow" in args: + self.spoiler = FilterLevel.ALLOW + elif "spoiler:only" in args: + self.spoiler = FilterLevel.ONLY + else: + self.spoiler = FilterLevel.NONE + if not self.images_only: + self.queries = [ + ( + query.lower(), + query.strip("`") if re.match(r"^`.*`$", query) else None, + ) + for query in self.other_args + ] + else: + self.queries = [] return True def compute_message(self, channel: ChannelLogs, message: MessageLog): @@ -30,6 +50,8 @@ class HistoryScanner(Scanner, ABC): self.history, self.raw_members, all_messages=self.all_messages, + queries=self.queries, + images_only=self.images_only, ) @abstractmethod @@ -44,14 +66,27 @@ class HistoryScanner(Scanner, ABC): raw_members: List[int], *, all_messages: bool, + queries: List[Tuple[str, Optional[str]]], + images_only: bool, ) -> bool: impacted = False # If author is included in the selection (empty list is all) if ( - (not message.bot or all_messages) - and len(raw_members) == 0 - or message.author in raw_members - ) and (message.content or message.attachment): + ( + (not message.bot or all_messages) + and len(raw_members) == 0 + or message.author in raw_members + ) + and (message.content or message.attachment) + and (not images_only or message.image) + ): + content = message.content.lower() + for query in queries: + if query[1] is not None: + if not re.match(query[1], message.content): + return False + elif not query[0] in content: + return False impacted = True history.messages += [message] return impacted diff --git a/src/scanners/last_scanner.py b/src/scanners/last_scanner.py index 3d8cbf0..055217e 100644 --- a/src/scanners/last_scanner.py +++ b/src/scanners/last_scanner.py @@ -9,10 +9,20 @@ from utils import generate_help class LastScanner(HistoryScanner): @staticmethod def help() -> str: - return generate_help("last", "Read last message") + return generate_help( + "last", + "Read last message (add text to filter like %find)", + args=[ + "image - pull an image instead of a message", + "spoiler:allow/only - allow spoiler images", + ], + ) def __init__(self): super().__init__(help=LastScanner.help()) - def get_results(self, intro: str) -> List[str]: - return self.history.to_string(type="last") + async def get_results(self, intro: str) -> List[str]: + if self.images_only: + return await self.history.to_string_image(type="last", spoiler=self.spoiler) + else: + return self.history.to_string(type="last") diff --git a/src/scanners/random_scanner.py b/src/scanners/random_scanner.py index f4fb7a9..42d4488 100644 --- a/src/scanners/random_scanner.py +++ b/src/scanners/random_scanner.py @@ -9,10 +9,22 @@ from utils import generate_help class RandomScanner(HistoryScanner): @staticmethod def help() -> str: - return generate_help("rand", "Read a random message") + return generate_help( + "rand", + "Read a random message (add text to filter like %find)", + args=[ + "image - pull an image instead of a message", + "spoiler:allow/only - allow spoiler images", + ], + ) def __init__(self): super().__init__(help=RandomScanner.help()) - def get_results(self, intro: str) -> List[str]: - return self.history.to_string(type="random") + async def get_results(self, intro: str) -> List[str]: + if self.images_only: + return await self.history.to_string_image( + type="random", spoiler=self.spoiler + ) + else: + return self.history.to_string(type="random") diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py index e5b41b4..dbfb831 100644 --- a/src/scanners/scanner.py +++ b/src/scanners/scanner.py @@ -4,6 +4,7 @@ from datetime import datetime import logging import re import discord +import inspect from utils import ( @@ -15,6 +16,8 @@ from utils import ( RELATIVE_REGEX, parse_time, command_cache, + FilterLevel, + SPLIT_TOKEN, ) from logs import ( GuildLogs, @@ -27,7 +30,17 @@ from logs import ( class Scanner(ABC): - VALID_ARGS = ["me", "here", "fast", "fresh", "mobile", "mention"] + VALID_ARGS = [ + "me", + "here", + "fast", + "fresh", + "mobile", + "mention", + "nsfw", + "nsfw:allow", + "nsfw:only", + ] def __init__( self, @@ -139,6 +152,30 @@ class Scanner(ABC): self.mention_users = "mention" in args or "mobile" in args + # nsfw filter + if "nsfw" in args or "nsfw:allow" in args: + self.nsfw = FilterLevel.ALLOW + elif "nsfw:only" in args: + self.nsfw = FilterLevel.ONLY + else: + self.nsfw = FilterLevel.NONE + + # fix nsfw filter if channel specified + if not self.full and any(channel.nsfw for channel in self.channels): + self.nsfw = FilterLevel.ALLOW + elif all(channel.nsfw for channel in self.channels): + self.nsfw = FilterLevel.ONLY + + # filter nsfw channels + if self.nsfw == FilterLevel.NONE: + self.channels = list( + filter(lambda channel: not channel.nsfw, self.channels) + ) + elif self.nsfw == FilterLevel.ONLY: + self.channels = list( + filter(lambda channel: channel.nsfw, self.channels) + ) + if not await self.init(message, *args): return @@ -220,18 +257,20 @@ class Scanner(ABC): await progress.edit(content="```Computing results...```") # Display results t0 = datetime.now() - results = self.get_results( - get_intro( - self.intro_context, - self.full, - self.channels, - self.members, - self.msg_count, - self.chan_count, - self.start_date, - self.stop_date, - ) + intro = get_intro( + self.intro_context, + self.full, + self.channels, + self.members, + self.msg_count, + self.chan_count, + self.start_date, + self.stop_date, ) + if inspect.iscoroutinefunction(self.get_results): + results = await self.get_results(intro) + else: + results = self.get_results(intro) logging.info( f"scan {guild.id} > results in {delta(t0):,}ms" ) @@ -244,7 +283,7 @@ class Scanner(ABC): ) for r in results: if r: - if len(response + "\n" + r) > 2000: + if isinstance(r, int) and r == SPLIT_TOKEN: await message.channel.send( response, reference=message if first else None, @@ -252,7 +291,16 @@ class Scanner(ABC): ) first = False response = "" - response += "\n" + r + elif isinstance(r, str): + if len(response + "\n" + r) > 2000: + await message.channel.send( + response, + reference=message if first else None, + allowed_mentions=allowed_mentions, + ) + first = False + response = "" + response += "\n" + r if len(response) > 0: await message.channel.send( response, diff --git a/src/utils/utils.py b/src/utils/utils.py index 4457326..a2e7c40 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -1,3 +1,4 @@ +from enum import Enum from typing import Callable, List, Dict, Union, Optional, Any import os import logging @@ -17,6 +18,7 @@ COMMON_HELP_ARGS = [ " - filter before ", "fast - only read cache", "fresh - does not read cache (long)", + "nsfw:allow/only - allow messages from nsfw channels", "mobile/mention - mentions users (fix @invalid-user bug)", ] @@ -30,7 +32,7 @@ def generate_help( replace_args=[], ): arg_list = "* " + "\n* ".join( - replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args + args + replace_args + COMMON_HELP_ARGS[len(replace_args) :] ) return f"""``` %{cmd}: {info} @@ -49,6 +51,15 @@ def deltas(t0: datetime): return (datetime.now() - t0).total_seconds() +class FilterLevel(Enum): + NONE = 0 + ALLOW = 1 + ONLY = 2 + + +SPLIT_TOKEN = 1152317803 + + # DISCORD API @@ -89,6 +100,25 @@ class FakeMessage: self.id = id +def is_image_spoiler(message: discord.Message) -> bool: + if len(message.attachments) > 0: + return message.attachments[0].is_spoiler() + elif len(message.embeds) > 0: + return re.match(r"||[^|]*http[^|]||", message.content.lower()) is not None + else: + return False + + +def should_allow_spoiler(message: discord.Message, spoiler: FilterLevel) -> bool: + is_spoiler = is_image_spoiler(message) + return ( + not is_spoiler + and spoiler <= FilterLevel.ALLOW + or is_spoiler + and spoiler >= FilterLevel.ALLOW + ) + + # FILE @@ -137,6 +167,21 @@ def val_sum(d: Dict[Any, int]) -> int: return sum(d.values()) +def serialize( + obj: Any, *, not_serialized: List[str] = [], dates: List[str] = [] +) -> Dict: + output = dict(obj.__dict__) + for key in not_serialized: + output.pop(key, None) + for key in dates: + if output[key]: + try: + output[key] = getattr(obj, key).isoformat() + except AttributeError: + pass + return output + + # MESSAGE FORMATTING