diff --git a/README.md b/README.md index 02b294d..0970c0a 100644 --- a/README.md +++ b/README.md @@ -53,9 +53,13 @@ * Common arguments: * @member/me: filter for one or more member * #channel/here: filter for one or more channel + * - filter after + * - filter before * all/everyone - include bots messages * fast: only read cache * fresh: does not read cache + +(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year') ``` ## Running this bot @@ -109,6 +113,7 @@ python3 src/main.py * improved scan `%words` * remove old and unused logs at start and guild leaving * GDPR disclaimer before scanning + * start and stop dates * **v1.12** * more scans: `%words` * concurrent `fast` analysis diff --git a/requirements.txt b/requirements.txt index 95a454b..7bc9d08 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ discord.py python-dotenv +python-dateutil git+git://github.com/Klemek/miniscord.git diff --git a/src/data_types/frequency.py b/src/data_types/frequency.py index 14cf5dd..075084c 100644 --- a/src/data_types/frequency.py +++ b/src/data_types/frequency.py @@ -38,7 +38,10 @@ class Frequency: *, member_specific: bool, ) -> List[str]: + self.dates.sort() delta = self.dates[-1] - self.dates[0] + if delta.days == 0: + delta = timedelta(days=1) total_msg = len(self.dates) busiest_weekday = top_key(self.week) busiest_hour = top_key(self.day) @@ -56,7 +59,9 @@ class Frequency: f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})", f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}", f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})", - f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)", + f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)" + if self.busiest_day is not None + else "", f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}", f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})", f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)", diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py index 86c7a28..83681e9 100644 --- a/src/logs/channel_logs.py +++ b/src/logs/channel_logs.py @@ -1,5 +1,6 @@ from typing import Union, Tuple, Any import discord +from discord import message from . import MessageLog from utils import FakeMessage @@ -17,6 +18,7 @@ class ChannelLogs: self.last_message_id = None self.format = FORMAT self.messages = [] + self.start_date = None elif isinstance(channel, dict): self.format = channel["format"] if "format" in channel else None if not self.is_format(): @@ -31,6 +33,9 @@ class ChannelLogs: self.messages = [ MessageLog(message, self) for message in channel["messages"] ] + self.start_date = ( + self.messages[-1].created_at if len(self.messages) > 0 else None + ) def is_format(self): return self.format == FORMAT @@ -80,6 +85,9 @@ class ChannelLogs: except discord.errors.HTTPException: yield -1, True return # When an exception occurs (like Forbidden) + self.start_date = ( + self.messages[-1].created_at if len(self.messages) > 0 else None + ) yield len(self.messages), True def dict(self) -> dict: diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py index 772f63f..8931318 100644 --- a/src/scanners/scanner.py +++ b/src/scanners/scanner.py @@ -5,7 +5,16 @@ import logging import re import discord -from utils import no_duplicate, get_intro, delta, gdpr + +from utils import ( + no_duplicate, + get_intro, + delta, + gdpr, + ISO8601_REGEX, + parse_time, + RELATIVE_TIME, +) from logs import ( GuildLogs, ChannelLogs, @@ -54,22 +63,42 @@ class Scanner(ABC): str(channel.id) for channel in message.channel_mentions ] str_mentions = [str(member.id) for member in message.mentions] + dates = [] for i, arg in enumerate(args[1:]): + skip_check = False if re.match(r"^<@!?\d+>$", arg): arg = arg[3:-1] if "!" in arg else arg[2:-1] elif re.match(r"^<#!?\d+>$", arg): arg = arg[3:-1] if "!" in arg else arg[2:-1] + elif re.match(ISO8601_REGEX, arg) or arg in RELATIVE_TIME: + dates += [parse_time(arg)] + skip_check = True + if len(dates) > 2: + await message.channel.send( + f"Too many date arguments: `{arg}`", reference=message + ) + return if ( arg not in self.valid_args + ["me", "here", "fast", "fresh"] and (not arg.isdigit() or not self.has_digit_args) and arg not in str_channel_mentions and arg not in str_mentions + and not skip_check ): await message.channel.send( f"Unrecognized argument: `{arg}`", reference=message ) return + self.start_datetime = None if len(dates) < 1 else min(dates) + self.stop_datetime = datetime.now() if len(dates) < 2 else max(dates) + + if self.start_datetime is not None and self.start_datetime > datetime.now(): + await message.channel.send( + f"Start date is after today", reference=message + ) + return + # Get selected channels or all of them if no channel arguments self.channels = no_duplicate(message.channel_mentions) @@ -103,6 +132,18 @@ class Scanner(ABC): total_msg, total_chan = await logs.load( progress, self.channels, fast="fast" in args, fresh="fresh" in args ) + if self.start_datetime is not None: + self.start_datetime = max( + self.start_datetime, + min( + [ + logs.channels[channel.id].start_date + for channel in self.channels + if channel.id in logs.channels + and logs.channels[channel.id].start_date is not None + ] + ), + ) if total_msg == CANCELLED: await message.channel.send( "Operation cancelled by user", @@ -127,13 +168,21 @@ class Scanner(ABC): [ self.compute_message(channel_logs, message_log) for message_log in channel_logs.messages + if ( + self.start_datetime is None + or message_log.created_at >= self.start_datetime + ) + and ( + self.stop_datetime is None + or message_log.created_at <= self.stop_datetime + ) ] ) self.total_msg += len(channel_logs.messages) self.msg_count += count self.chan_count += 1 if count > 0 else 0 logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms") - if self.total_msg == 0: + if self.msg_count == 0: await message.channel.send( "There are no messages found matching the filters", reference=message, @@ -150,21 +199,24 @@ class Scanner(ABC): self.members, self.msg_count, self.chan_count, + self.start_datetime, + self.stop_datetime, ) ) logging.info(f"scan {guild.id} > results in {delta(t0):,}ms") response = "" first = True for r in results: - if len(response + "\n" + r) > 2000: - await message.channel.send( - response, - reference=message if first else None, - allowed_mentions=discord.AllowedMentions.none(), - ) - first = False - response = "" - response += "\n" + r + if r: + if len(response + "\n" + r) > 2000: + await message.channel.send( + response, + reference=message if first else None, + allowed_mentions=discord.AllowedMentions.none(), + ) + first = False + response = "" + response += "\n" + r if len(response) > 0: await message.channel.send( response, diff --git a/src/utils/utils.py b/src/utils/utils.py index 1447a40..a8f2ad5 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -1,15 +1,21 @@ +from calendar import month from typing import List, Dict, Union, Optional, Any import os import logging import discord import math -from datetime import datetime +from datetime import datetime, timedelta +import re +import dateutil.parser +from dateutil.relativedelta import relativedelta # OTHER COMMON_HELP_ARGS = [ "@member/me - filter for one or more member", "#channel/here - filter for one or more channel", + " - filter after ", + " - filter before ", "fast - only read cache", "fresh - does not read cache (long)", ] @@ -30,6 +36,7 @@ def generate_help( %{cmd}: {info} arguments: {arg_list} +(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year') Example: %{cmd} {example} ```""" @@ -153,6 +160,37 @@ def precise(p: float, *, precision: int = 2) -> str: # DATE FORMATTING +ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$" +ISO8601_FULL = "0000-01-01T00:00:00" + + +def parse_iso_datetime(str_date: str) -> datetime: + if re.match( + "^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date + ): + str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :] + return dateutil.parser.parse(str_date) + + +RELATIVE_TIME = { + "today": relativedelta(days=1), + "yesterday": relativedelta(days=2), + "week": relativedelta(weeks=1), + "month": relativedelta(months=1), + "year": relativedelta(years=1), +} + + +def parse_relative_time(src: str) -> datetime: + return datetime.utcnow() - RELATIVE_TIME[src] + + +def parse_time(src: str) -> datetime: + if src in RELATIVE_TIME: + return parse_relative_time(src) + else: + return parse_iso_datetime(src) + def str_date(date: datetime) -> str: return date.strftime("%d %b. %Y") # 12 Jun. 2018 @@ -162,29 +200,37 @@ def str_datetime(date: datetime) -> str: return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018 -def from_now(src: Optional[datetime]) -> str: - if src is None: - return "never" - delay = datetime.utcnow() - src +def str_delta(delay: timedelta) -> str: seconds = delay.seconds minutes = seconds // 60 hours = minutes // 60 if delay.days < 1: if hours < 1: if minutes == 0: - return "now" + return "no time" elif minutes == 1: - return "a minute ago" + return "a minute" else: - return f"{minutes} minutes ago" + return f"{minutes} minutes" elif hours == 1: - return "an hour ago" + return "an hour" else: - return f"{hours} hours ago" + return f"{hours} hours" elif delay.days == 1: - return "yesterday" + return "one day" else: - return f"{delay.days:,} days ago" + return f"{delay.days:,} days" + + +def from_now(src: Optional[datetime]) -> str: + if src is None: + return "never" + output = str_delta(datetime.utcnow() - src) + if output == "no time": + return "now" + elif output == "one day": + return "yesterday" + return output + " ago" # APP SPECIFIC @@ -197,46 +243,48 @@ def get_intro( members: List[discord.Member], nmm: int, # number of messages impacted nc: int, # number of impacted channels + start_datetime: datetime, + stop_datetime: datetime, ) -> str: """ Get the introduction sentence of the response """ + time_text = "" + if start_datetime is not None: + stop_datetime = datetime.now() if stop_datetime is None else stop_datetime + time_text = f" (in {str_delta(stop_datetime - start_datetime)})" # Show all data (members, channels) when it's less than 5 units if len(members) == 0: # Full scan of the server if full: - return f"{subject} in this server ({nc} channels, {nmm:,} messages):" + return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:" elif len(channels) < 5: - return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:" + return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:" else: - return ( - f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:" - ) + return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:" elif len(members) < 5: if full: - return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:" + return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:" elif len(channels) < 5: return ( f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} " - f"{subject.lower()} in {nmm:,} messages:" + f"{subject.lower()} in {nmm:,} messages{time_text}:" ) else: return ( f"{aggregate([m.mention for m in members])} on these {len(channels)} channels " - f"{subject.lower()} in {nmm:,} messages:" + f"{subject.lower()} in {nmm:,} messages{time_text}:" ) else: if full: - return ( - f"These {len(members)} members {subject.lower()} in {nmm:,} messages:" - ) + return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:" elif len(channels) < 5: return ( f"These {len(members)} members on {aggregate([c.mention for c in channels])} " - f"{subject.lower()} in {nmm:,} messages:" + f"{subject.lower()} in {nmm:,} messages{time_text}:" ) else: return ( f"These {len(members)} members on these {len(channels)} channels " - f"{subject.lower()} in {nmm:,} messages:" + f"{subject.lower()} in {nmm:,} messages{time_text}:" )