start en stop dates

This commit is contained in:
Klemek
2021-04-09 17:39:42 +02:00
parent b7a6f3313b
commit 2062f08721
6 changed files with 156 additions and 37 deletions
+5
View File
@@ -53,9 +53,13 @@
* Common arguments:
* @member/me: filter for one or more member
* #channel/here: filter for one or more channel
* <date1> - filter after <date1>
* <date2> - filter before <date2>
* all/everyone - include bots messages
* fast: only read cache
* fresh: does not read cache
(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
```
## Running this bot
@@ -109,6 +113,7 @@ python3 src/main.py
* improved scan `%words`
* remove old and unused logs at start and guild leaving
* GDPR disclaimer before scanning
* start and stop dates
* **v1.12**
* more scans: `%words`
* concurrent `fast` analysis
+1
View File
@@ -1,3 +1,4 @@
discord.py
python-dotenv
python-dateutil
git+git://github.com/Klemek/miniscord.git
+6 -1
View File
@@ -38,7 +38,10 @@ class Frequency:
*,
member_specific: bool,
) -> List[str]:
self.dates.sort()
delta = self.dates[-1] - self.dates[0]
if delta.days == 0:
delta = timedelta(days=1)
total_msg = len(self.dates)
busiest_weekday = top_key(self.week)
busiest_hour = top_key(self.day)
@@ -56,7 +59,9 @@ class Frequency:
f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
if self.busiest_day is not None
else "",
f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
+8
View File
@@ -1,5 +1,6 @@
from typing import Union, Tuple, Any
import discord
from discord import message
from . import MessageLog
from utils import FakeMessage
@@ -17,6 +18,7 @@ class ChannelLogs:
self.last_message_id = None
self.format = FORMAT
self.messages = []
self.start_date = None
elif isinstance(channel, dict):
self.format = channel["format"] if "format" in channel else None
if not self.is_format():
@@ -31,6 +33,9 @@ class ChannelLogs:
self.messages = [
MessageLog(message, self) for message in channel["messages"]
]
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
def is_format(self):
return self.format == FORMAT
@@ -80,6 +85,9 @@ class ChannelLogs:
except discord.errors.HTTPException:
yield -1, True
return # When an exception occurs (like Forbidden)
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
yield len(self.messages), True
def dict(self) -> dict:
+54 -2
View File
@@ -5,7 +5,16 @@ import logging
import re
import discord
from utils import no_duplicate, get_intro, delta, gdpr
from utils import (
no_duplicate,
get_intro,
delta,
gdpr,
ISO8601_REGEX,
parse_time,
RELATIVE_TIME,
)
from logs import (
GuildLogs,
ChannelLogs,
@@ -54,22 +63,42 @@ class Scanner(ABC):
str(channel.id) for channel in message.channel_mentions
]
str_mentions = [str(member.id) for member in message.mentions]
dates = []
for i, arg in enumerate(args[1:]):
skip_check = False
if re.match(r"^<@!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(r"^<#!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(ISO8601_REGEX, arg) or arg in RELATIVE_TIME:
dates += [parse_time(arg)]
skip_check = True
if len(dates) > 2:
await message.channel.send(
f"Too many date arguments: `{arg}`", reference=message
)
return
if (
arg not in self.valid_args + ["me", "here", "fast", "fresh"]
and (not arg.isdigit() or not self.has_digit_args)
and arg not in str_channel_mentions
and arg not in str_mentions
and not skip_check
):
await message.channel.send(
f"Unrecognized argument: `{arg}`", reference=message
)
return
self.start_datetime = None if len(dates) < 1 else min(dates)
self.stop_datetime = datetime.now() if len(dates) < 2 else max(dates)
if self.start_datetime is not None and self.start_datetime > datetime.now():
await message.channel.send(
f"Start date is after today", reference=message
)
return
# Get selected channels or all of them if no channel arguments
self.channels = no_duplicate(message.channel_mentions)
@@ -103,6 +132,18 @@ class Scanner(ABC):
total_msg, total_chan = await logs.load(
progress, self.channels, fast="fast" in args, fresh="fresh" in args
)
if self.start_datetime is not None:
self.start_datetime = max(
self.start_datetime,
min(
[
logs.channels[channel.id].start_date
for channel in self.channels
if channel.id in logs.channels
and logs.channels[channel.id].start_date is not None
]
),
)
if total_msg == CANCELLED:
await message.channel.send(
"Operation cancelled by user",
@@ -127,13 +168,21 @@ class Scanner(ABC):
[
self.compute_message(channel_logs, message_log)
for message_log in channel_logs.messages
if (
self.start_datetime is None
or message_log.created_at >= self.start_datetime
)
and (
self.stop_datetime is None
or message_log.created_at <= self.stop_datetime
)
]
)
self.total_msg += len(channel_logs.messages)
self.msg_count += count
self.chan_count += 1 if count > 0 else 0
logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
if self.total_msg == 0:
if self.msg_count == 0:
await message.channel.send(
"There are no messages found matching the filters",
reference=message,
@@ -150,12 +199,15 @@ class Scanner(ABC):
self.members,
self.msg_count,
self.chan_count,
self.start_datetime,
self.stop_datetime,
)
)
logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
response = ""
first = True
for r in results:
if r:
if len(response + "\n" + r) > 2000:
await message.channel.send(
response,
+73 -25
View File
@@ -1,15 +1,21 @@
from calendar import month
from typing import List, Dict, Union, Optional, Any
import os
import logging
import discord
import math
from datetime import datetime
from datetime import datetime, timedelta
import re
import dateutil.parser
from dateutil.relativedelta import relativedelta
# OTHER
COMMON_HELP_ARGS = [
"@member/me - filter for one or more member",
"#channel/here - filter for one or more channel",
"<date1> - filter after <date1>",
"<date2> - filter before <date2>",
"fast - only read cache",
"fresh - does not read cache (long)",
]
@@ -30,6 +36,7 @@ def generate_help(
%{cmd}: {info}
arguments:
{arg_list}
(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
Example: %{cmd} {example}
```"""
@@ -153,6 +160,37 @@ def precise(p: float, *, precision: int = 2) -> str:
# DATE FORMATTING
ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
ISO8601_FULL = "0000-01-01T00:00:00"
def parse_iso_datetime(str_date: str) -> datetime:
if re.match(
"^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
):
str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
return dateutil.parser.parse(str_date)
RELATIVE_TIME = {
"today": relativedelta(days=1),
"yesterday": relativedelta(days=2),
"week": relativedelta(weeks=1),
"month": relativedelta(months=1),
"year": relativedelta(years=1),
}
def parse_relative_time(src: str) -> datetime:
return datetime.utcnow() - RELATIVE_TIME[src]
def parse_time(src: str) -> datetime:
if src in RELATIVE_TIME:
return parse_relative_time(src)
else:
return parse_iso_datetime(src)
def str_date(date: datetime) -> str:
return date.strftime("%d %b. %Y") # 12 Jun. 2018
@@ -162,29 +200,37 @@ def str_datetime(date: datetime) -> str:
return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
delay = datetime.utcnow() - src
def str_delta(delay: timedelta) -> str:
seconds = delay.seconds
minutes = seconds // 60
hours = minutes // 60
if delay.days < 1:
if hours < 1:
if minutes == 0:
return "now"
return "no time"
elif minutes == 1:
return "a minute ago"
return "a minute"
else:
return f"{minutes} minutes ago"
return f"{minutes} minutes"
elif hours == 1:
return "an hour ago"
return "an hour"
else:
return f"{hours} hours ago"
return f"{hours} hours"
elif delay.days == 1:
return "yesterday"
return "one day"
else:
return f"{delay.days:,} days ago"
return f"{delay.days:,} days"
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
output = str_delta(datetime.utcnow() - src)
if output == "no time":
return "now"
elif output == "one day":
return "yesterday"
return output + " ago"
# APP SPECIFIC
@@ -197,46 +243,48 @@ def get_intro(
members: List[discord.Member],
nmm: int, # number of messages impacted
nc: int, # number of impacted channels
start_datetime: datetime,
stop_datetime: datetime,
) -> str:
"""
Get the introduction sentence of the response
"""
time_text = ""
if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units
if len(members) == 0:
# Full scan of the server
if full:
return f"{subject} in this server ({nc} channels, {nmm:,} messages):"
return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
elif len(channels) < 5:
return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:"
return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
else:
return (
f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
)
return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(members) < 5:
if full:
return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:"
return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(channels) < 5:
return (
f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)
else:
return (
f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)
else:
if full:
return (
f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
)
return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(channels) < 5:
return (
f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)
else:
return (
f"These {len(members)} members on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)