start en stop dates

This commit is contained in:
Klemek
2021-04-09 17:39:42 +02:00
parent b7a6f3313b
commit 2062f08721
6 changed files with 156 additions and 37 deletions
+5
View File
@@ -53,9 +53,13 @@
* Common arguments: * Common arguments:
* @member/me: filter for one or more member * @member/me: filter for one or more member
* #channel/here: filter for one or more channel * #channel/here: filter for one or more channel
* <date1> - filter after <date1>
* <date2> - filter before <date2>
* all/everyone - include bots messages * all/everyone - include bots messages
* fast: only read cache * fast: only read cache
* fresh: does not read cache * fresh: does not read cache
(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
``` ```
## Running this bot ## Running this bot
@@ -109,6 +113,7 @@ python3 src/main.py
* improved scan `%words` * improved scan `%words`
* remove old and unused logs at start and guild leaving * remove old and unused logs at start and guild leaving
* GDPR disclaimer before scanning * GDPR disclaimer before scanning
* start and stop dates
* **v1.12** * **v1.12**
* more scans: `%words` * more scans: `%words`
* concurrent `fast` analysis * concurrent `fast` analysis
+1
View File
@@ -1,3 +1,4 @@
discord.py discord.py
python-dotenv python-dotenv
python-dateutil
git+git://github.com/Klemek/miniscord.git git+git://github.com/Klemek/miniscord.git
+6 -1
View File
@@ -38,7 +38,10 @@ class Frequency:
*, *,
member_specific: bool, member_specific: bool,
) -> List[str]: ) -> List[str]:
self.dates.sort()
delta = self.dates[-1] - self.dates[0] delta = self.dates[-1] - self.dates[0]
if delta.days == 0:
delta = timedelta(days=1)
total_msg = len(self.dates) total_msg = len(self.dates)
busiest_weekday = top_key(self.week) busiest_weekday = top_key(self.week)
busiest_hour = top_key(self.day) busiest_hour = top_key(self.day)
@@ -56,7 +59,9 @@ class Frequency:
f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})", f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}", f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})", f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)", f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
if self.busiest_day is not None
else "",
f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}", f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})", f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)", f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
+8
View File
@@ -1,5 +1,6 @@
from typing import Union, Tuple, Any from typing import Union, Tuple, Any
import discord import discord
from discord import message
from . import MessageLog from . import MessageLog
from utils import FakeMessage from utils import FakeMessage
@@ -17,6 +18,7 @@ class ChannelLogs:
self.last_message_id = None self.last_message_id = None
self.format = FORMAT self.format = FORMAT
self.messages = [] self.messages = []
self.start_date = None
elif isinstance(channel, dict): elif isinstance(channel, dict):
self.format = channel["format"] if "format" in channel else None self.format = channel["format"] if "format" in channel else None
if not self.is_format(): if not self.is_format():
@@ -31,6 +33,9 @@ class ChannelLogs:
self.messages = [ self.messages = [
MessageLog(message, self) for message in channel["messages"] MessageLog(message, self) for message in channel["messages"]
] ]
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
def is_format(self): def is_format(self):
return self.format == FORMAT return self.format == FORMAT
@@ -80,6 +85,9 @@ class ChannelLogs:
except discord.errors.HTTPException: except discord.errors.HTTPException:
yield -1, True yield -1, True
return # When an exception occurs (like Forbidden) return # When an exception occurs (like Forbidden)
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
yield len(self.messages), True yield len(self.messages), True
def dict(self) -> dict: def dict(self) -> dict:
+54 -2
View File
@@ -5,7 +5,16 @@ import logging
import re import re
import discord import discord
from utils import no_duplicate, get_intro, delta, gdpr
from utils import (
no_duplicate,
get_intro,
delta,
gdpr,
ISO8601_REGEX,
parse_time,
RELATIVE_TIME,
)
from logs import ( from logs import (
GuildLogs, GuildLogs,
ChannelLogs, ChannelLogs,
@@ -54,22 +63,42 @@ class Scanner(ABC):
str(channel.id) for channel in message.channel_mentions str(channel.id) for channel in message.channel_mentions
] ]
str_mentions = [str(member.id) for member in message.mentions] str_mentions = [str(member.id) for member in message.mentions]
dates = []
for i, arg in enumerate(args[1:]): for i, arg in enumerate(args[1:]):
skip_check = False
if re.match(r"^<@!?\d+>$", arg): if re.match(r"^<@!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1] arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(r"^<#!?\d+>$", arg): elif re.match(r"^<#!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1] arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(ISO8601_REGEX, arg) or arg in RELATIVE_TIME:
dates += [parse_time(arg)]
skip_check = True
if len(dates) > 2:
await message.channel.send(
f"Too many date arguments: `{arg}`", reference=message
)
return
if ( if (
arg not in self.valid_args + ["me", "here", "fast", "fresh"] arg not in self.valid_args + ["me", "here", "fast", "fresh"]
and (not arg.isdigit() or not self.has_digit_args) and (not arg.isdigit() or not self.has_digit_args)
and arg not in str_channel_mentions and arg not in str_channel_mentions
and arg not in str_mentions and arg not in str_mentions
and not skip_check
): ):
await message.channel.send( await message.channel.send(
f"Unrecognized argument: `{arg}`", reference=message f"Unrecognized argument: `{arg}`", reference=message
) )
return return
self.start_datetime = None if len(dates) < 1 else min(dates)
self.stop_datetime = datetime.now() if len(dates) < 2 else max(dates)
if self.start_datetime is not None and self.start_datetime > datetime.now():
await message.channel.send(
f"Start date is after today", reference=message
)
return
# Get selected channels or all of them if no channel arguments # Get selected channels or all of them if no channel arguments
self.channels = no_duplicate(message.channel_mentions) self.channels = no_duplicate(message.channel_mentions)
@@ -103,6 +132,18 @@ class Scanner(ABC):
total_msg, total_chan = await logs.load( total_msg, total_chan = await logs.load(
progress, self.channels, fast="fast" in args, fresh="fresh" in args progress, self.channels, fast="fast" in args, fresh="fresh" in args
) )
if self.start_datetime is not None:
self.start_datetime = max(
self.start_datetime,
min(
[
logs.channels[channel.id].start_date
for channel in self.channels
if channel.id in logs.channels
and logs.channels[channel.id].start_date is not None
]
),
)
if total_msg == CANCELLED: if total_msg == CANCELLED:
await message.channel.send( await message.channel.send(
"Operation cancelled by user", "Operation cancelled by user",
@@ -127,13 +168,21 @@ class Scanner(ABC):
[ [
self.compute_message(channel_logs, message_log) self.compute_message(channel_logs, message_log)
for message_log in channel_logs.messages for message_log in channel_logs.messages
if (
self.start_datetime is None
or message_log.created_at >= self.start_datetime
)
and (
self.stop_datetime is None
or message_log.created_at <= self.stop_datetime
)
] ]
) )
self.total_msg += len(channel_logs.messages) self.total_msg += len(channel_logs.messages)
self.msg_count += count self.msg_count += count
self.chan_count += 1 if count > 0 else 0 self.chan_count += 1 if count > 0 else 0
logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms") logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
if self.total_msg == 0: if self.msg_count == 0:
await message.channel.send( await message.channel.send(
"There are no messages found matching the filters", "There are no messages found matching the filters",
reference=message, reference=message,
@@ -150,12 +199,15 @@ class Scanner(ABC):
self.members, self.members,
self.msg_count, self.msg_count,
self.chan_count, self.chan_count,
self.start_datetime,
self.stop_datetime,
) )
) )
logging.info(f"scan {guild.id} > results in {delta(t0):,}ms") logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
response = "" response = ""
first = True first = True
for r in results: for r in results:
if r:
if len(response + "\n" + r) > 2000: if len(response + "\n" + r) > 2000:
await message.channel.send( await message.channel.send(
response, response,
+73 -25
View File
@@ -1,15 +1,21 @@
from calendar import month
from typing import List, Dict, Union, Optional, Any from typing import List, Dict, Union, Optional, Any
import os import os
import logging import logging
import discord import discord
import math import math
from datetime import datetime from datetime import datetime, timedelta
import re
import dateutil.parser
from dateutil.relativedelta import relativedelta
# OTHER # OTHER
COMMON_HELP_ARGS = [ COMMON_HELP_ARGS = [
"@member/me - filter for one or more member", "@member/me - filter for one or more member",
"#channel/here - filter for one or more channel", "#channel/here - filter for one or more channel",
"<date1> - filter after <date1>",
"<date2> - filter before <date2>",
"fast - only read cache", "fast - only read cache",
"fresh - does not read cache (long)", "fresh - does not read cache (long)",
] ]
@@ -30,6 +36,7 @@ def generate_help(
%{cmd}: {info} %{cmd}: {info}
arguments: arguments:
{arg_list} {arg_list}
(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
Example: %{cmd} {example} Example: %{cmd} {example}
```""" ```"""
@@ -153,6 +160,37 @@ def precise(p: float, *, precision: int = 2) -> str:
# DATE FORMATTING # DATE FORMATTING
ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
ISO8601_FULL = "0000-01-01T00:00:00"
def parse_iso_datetime(str_date: str) -> datetime:
if re.match(
"^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
):
str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
return dateutil.parser.parse(str_date)
RELATIVE_TIME = {
"today": relativedelta(days=1),
"yesterday": relativedelta(days=2),
"week": relativedelta(weeks=1),
"month": relativedelta(months=1),
"year": relativedelta(years=1),
}
def parse_relative_time(src: str) -> datetime:
return datetime.utcnow() - RELATIVE_TIME[src]
def parse_time(src: str) -> datetime:
if src in RELATIVE_TIME:
return parse_relative_time(src)
else:
return parse_iso_datetime(src)
def str_date(date: datetime) -> str: def str_date(date: datetime) -> str:
return date.strftime("%d %b. %Y") # 12 Jun. 2018 return date.strftime("%d %b. %Y") # 12 Jun. 2018
@@ -162,29 +200,37 @@ def str_datetime(date: datetime) -> str:
return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018 return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018
def from_now(src: Optional[datetime]) -> str: def str_delta(delay: timedelta) -> str:
if src is None:
return "never"
delay = datetime.utcnow() - src
seconds = delay.seconds seconds = delay.seconds
minutes = seconds // 60 minutes = seconds // 60
hours = minutes // 60 hours = minutes // 60
if delay.days < 1: if delay.days < 1:
if hours < 1: if hours < 1:
if minutes == 0: if minutes == 0:
return "now" return "no time"
elif minutes == 1: elif minutes == 1:
return "a minute ago" return "a minute"
else: else:
return f"{minutes} minutes ago" return f"{minutes} minutes"
elif hours == 1: elif hours == 1:
return "an hour ago" return "an hour"
else: else:
return f"{hours} hours ago" return f"{hours} hours"
elif delay.days == 1: elif delay.days == 1:
return "yesterday" return "one day"
else: else:
return f"{delay.days:,} days ago" return f"{delay.days:,} days"
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
output = str_delta(datetime.utcnow() - src)
if output == "no time":
return "now"
elif output == "one day":
return "yesterday"
return output + " ago"
# APP SPECIFIC # APP SPECIFIC
@@ -197,46 +243,48 @@ def get_intro(
members: List[discord.Member], members: List[discord.Member],
nmm: int, # number of messages impacted nmm: int, # number of messages impacted
nc: int, # number of impacted channels nc: int, # number of impacted channels
start_datetime: datetime,
stop_datetime: datetime,
) -> str: ) -> str:
""" """
Get the introduction sentence of the response Get the introduction sentence of the response
""" """
time_text = ""
if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units # Show all data (members, channels) when it's less than 5 units
if len(members) == 0: if len(members) == 0:
# Full scan of the server # Full scan of the server
if full: if full:
return f"{subject} in this server ({nc} channels, {nmm:,} messages):" return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
elif len(channels) < 5: elif len(channels) < 5:
return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:" return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
else: else:
return ( return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
)
elif len(members) < 5: elif len(members) < 5:
if full: if full:
return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:" return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(channels) < 5: elif len(channels) < 5:
return ( return (
f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} " f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )
else: else:
return ( return (
f"{aggregate([m.mention for m in members])} on these {len(channels)} channels " f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )
else: else:
if full: if full:
return ( return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
)
elif len(channels) < 5: elif len(channels) < 5:
return ( return (
f"These {len(members)} members on {aggregate([c.mention for c in channels])} " f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )
else: else:
return ( return (
f"These {len(members)} members on these {len(channels)} channels " f"These {len(members)} members on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )