Merge pull request #32 from Klemek/dev

v1.13
This commit is contained in:
Klemek
2021-04-09 19:51:54 +02:00
committed by GitHub
30 changed files with 728 additions and 414 deletions
+26
View File
@@ -0,0 +1,26 @@
name: Python
on: ["push", "pull_request"]
jobs:
syntax:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+12 -1
View File
@@ -21,6 +21,7 @@
* %first - read first message
* %rand - read a random message
* %last - read last message
* %gdpr - displays GDPR information
* %emojis - rank emotes by their usage
* arguments:
* <n> - top <n> emojis, default is 20
@@ -43,7 +44,7 @@
* %react - rank users by their reactions
* arguments:
* <n> - top <n> messages, default is 10
* %words - rank words by their usage
* %words - (BETA) rank words by their usage
* arguments:
* <n> - words containings <n> or more letters, default is 3
* <n2> - top <n2> words, default is 10
@@ -52,9 +53,13 @@
* Common arguments:
* @member/me: filter for one or more member
* #channel/here: filter for one or more channel
* <date1> - filter after <date1>
* <date2> - filter before <date2>
* all/everyone - include bots messages
* fast: only read cache
* fresh: does not read cache
(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
```
## Running this bot
@@ -104,6 +109,12 @@ python3 src/main.py
## Changelog
* **v1.13**
* improved scan `%words`
* remove old and unused logs at start and guild leaving
* GDPR disclaimer before scanning
* start and stop dates
* bug fix and improvements
* **v1.12**
* more scans: `%words`
* concurrent `fast` analysis
+1
View File
@@ -1,3 +1,4 @@
discord.py
python-dotenv
python-dateutil
git+git://github.com/Klemek/miniscord.git
+40 -44
View File
@@ -23,49 +23,45 @@ class Composition:
self.spoilers = 0
def to_string(self, msg_count: int) -> List[str]:
ret = []
ret += [
f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}"
]
if self.plain_text > 0:
ret += [
f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
]
if self.edited > 0:
ret += [
f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
]
if self.everyone > 0:
ret += [
f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
]
if self.mentions > 0:
ret += [
f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)",
]
if self.answers > 0:
ret += [
f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
]
total_emotes = val_sum(self.emotes)
if total_emotes > 0:
top_emote = top_key(self.emotes)
ret += [
f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)",
f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})",
]
if self.emote_only > 0:
ret += [
f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
]
if self.images > 0:
ret += [f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"]
if self.links > 0:
ret += [f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"]
if self.spoilers > 0:
ret += [
f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
]
if self.tts > 0:
ret += [f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"]
top_emote = top_key(self.emotes)
ret = [
f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}",
f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
if self.plain_text > 0
else "",
f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
if self.edited > 0
else "",
f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
if self.everyone > 0
else "",
f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)"
if self.mentions > 0
else "",
f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
if self.answers > 0
else "",
f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)"
if total_emotes > 0
else "",
f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})"
if total_emotes > 0
else "",
f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
if self.emote_only > 0
else "",
f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"
if self.images > 0
else "",
f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"
if self.links > 0
else "",
f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
if self.spoilers > 0
else "",
f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"
if self.tts > 0
else "",
]
return ret
+10 -10
View File
@@ -38,7 +38,10 @@ class Frequency:
*,
member_specific: bool,
) -> List[str]:
self.dates.sort()
delta = self.dates[-1] - self.dates[0]
if delta.days == 0:
delta = timedelta(days=1)
total_msg = len(self.dates)
busiest_weekday = top_key(self.week)
busiest_hour = top_key(self.day)
@@ -46,7 +49,7 @@ class Frequency:
if (
self.dates[0].weekday() <= busiest_weekday
and self.dates[-1].weekday() >= busiest_weekday
):
) or n_weekdays == 0:
n_weekdays += 1
n_hours = delta.days
if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:
@@ -56,19 +59,16 @@ class Frequency:
f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
if self.busiest_day is not None
else "",
f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})",
f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg",
f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
if member_specific
else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})",
]
if member_specific:
ret += [
f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
]
else:
ret += [
f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})"
]
return ret
+60 -64
View File
@@ -25,74 +25,70 @@ class Presence:
show_top_channel: bool,
member_specific: bool,
) -> List[str]:
ret = []
if chan_count is None:
type = "server's"
elif chan_count == 1:
type = "channel's"
else:
type = "channels'"
if member_specific:
ret += [
f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
]
else:
top_member = top_key(self.messages)
ret += [
f"- **top messages**: {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})"
]
if show_top_channel:
top_channel = top_key(self.channel_usage)
channel_sum = val_sum(self.channel_usage)
found_in = sorted(
self.channel_usage,
key=lambda k: self.channel_usage[k] / self.channel_total[k],
)[-1]
ret += [
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})",
]
if member_specific:
ret += [
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
]
if member_specific:
if len(self.mentions) > 0:
top_mention = top_key(self.mentions)
mention_sum = val_sum(self.mentions)
ret += [
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})",
]
if len(self.mention_others) > 0:
top_mention = top_key(self.mention_others)
mention_sum = val_sum(self.mention_others)
if member_specific:
ret += [
f"- **mentioned others**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
f"- **mostly mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
]
else:
top_member = top_key(self.mention_count)
ret += [
f"- **mentioned**: {plural(mention_sum, 'time')} ({mention(top_member)}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
f"- **top mentions**: {mention(top_member)} ({plural(self.mention_count[top_member], 'time')}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
f"- **most mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
]
if len(self.reactions) > 0:
total_used = val_sum(self.reactions)
top_reaction = top_key(self.reactions)
ret += [
f"- **reactions**: {plural(total_used, 'time')}",
f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_used)})",
]
if member_specific:
ret[
-2
] += f" ({percent(total_used/val_sum(self.used_reaction))} of {type})"
else:
top_member = top_key(self.used_reaction)
ret.insert(
-1,
f"- **top reactions**: {mention(top_member)} ({plural(self.used_reaction[top_member], 'time')}, {percent(self.used_reaction[top_member]/val_sum(self.used_reaction))})",
)
top_member = top_key(self.messages)
top_channel = top_key(self.channel_usage)
channel_sum = val_sum(self.channel_usage)
found_in = top_key(
self.channel_usage,
key=lambda k: self.channel_usage[k] / self.channel_total[k],
)
top_mention = top_key(self.mentions)
mention_sum = val_sum(self.mentions)
top_mention_others = top_key(self.mention_others)
mention_others_sum = val_sum(self.mention_others)
top_member_mentioned = top_key(self.mention_count)
total_reaction_used = val_sum(self.reactions)
top_reaction = top_key(self.reactions)
top_reaction_member = top_key(self.used_reaction)
ret = [
f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
if member_specific
else f"- **top messages**: {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})",
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
if show_top_channel
else "",
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
if show_top_channel and member_specific
else "",
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
if member_specific and len(self.mentions) > 0
else "",
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
if member_specific and len(self.mentions) > 0
else "",
f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
if len(self.mention_others) > 0 and member_specific
else "",
f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
if len(self.mention_others) > 0 and member_specific
else "",
f"- **mentioned**: {plural(mention_others_sum, 'time')} ({mention(top_member_mentioned)}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
if len(self.mention_others) > 0 and not member_specific
else "",
f"- **top mentions**: {mention(top_member_mentioned)} ({plural(self.mention_count[top_member_mentioned], 'time')}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
if len(self.mention_others) > 0 and not member_specific
else "",
f"- **most mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
if len(self.mention_others) > 0 and not member_specific
else "",
f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and not member_specific
else "",
f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
if len(self.reactions) > 0 and member_specific
else "",
f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
if len(self.reactions) > 0 and not member_specific
else "",
f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_reaction_used)})"
if len(self.reactions) > 0
else "",
]
return ret
+1 -1
View File
@@ -1,3 +1,3 @@
from .message_log import MessageLog
from .channel_logs import ChannelLogs
from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED
from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED, NO_FILE
+72 -27
View File
@@ -1,5 +1,7 @@
from typing import Union, Tuple, Any
import discord
from discord import message
from datetime import datetime
from . import MessageLog
from utils import FakeMessage
@@ -7,6 +9,8 @@ from utils import FakeMessage
CHUNK_SIZE = 2000
FORMAT = 3
NOT_SERIALIZED = ["channel", "guild", "start_date"]
class ChannelLogs:
def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -15,8 +19,10 @@ class ChannelLogs:
self.id = channel.id
self.name = channel.name
self.last_message_id = None
self.first_message_id = None
self.format = FORMAT
self.messages = []
self.start_date = None
elif isinstance(channel, dict):
self.format = channel["format"] if "format" in channel else None
if not self.is_format():
@@ -28,63 +34,102 @@ class ChannelLogs:
if channel["last_message_id"] is not None
else None
)
self.first_message_id = (
int(channel["first_message_id"])
if "first_message_id" in channel
and channel["first_message_id"] is not None
else None
)
self.messages = [
MessageLog(message, self) for message in channel["messages"]
]
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
def is_format(self):
return self.format == FORMAT
async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
async def load(
self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
) -> Tuple[int, int]:
self.name = channel.name
self.channel = channel
is_empty = self.last_message_id is None
try:
if self.last_message_id is not None: # append
if is_empty:
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check != 1:
yield len(self.messages), True
return
# load backward
if is_empty or (
self.first_message_id is not None
and (
start_date is None
or (self.start_date is not None and self.start_date > start_date)
)
):
first_message_date = None
tmp_message_id = 0
done = 0
while (
first_message_date is None
or (
done >= CHUNK_SIZE
and (start_date is None or first_message_date > start_date)
)
) and tmp_message_id != self.first_message_id:
tmp_message_id = self.first_message_id
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(self.first_message_id)
if self.first_message_id is not None
else None,
oldest_first=False,
):
done += 1
self.first_message_id = message.id
first_message_date = message.created_at
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
if done < CHUNK_SIZE: # reached bottom
self.first_message_id = None
self.last_message_id = channel.last_message_id
# load forward
last_message_date = self.messages[0].created_at
if not is_empty and (stop_date is None or last_message_date < stop_date):
tmp_message_id = None
while (
self.last_message_id != channel.last_message_id
and self.last_message_id != tmp_message_id
):
and (stop_date is None or last_message_date < stop_date)
) and self.last_message_id != tmp_message_id:
tmp_message_id = self.last_message_id
async for message in channel.history(
limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id),
oldest_first=True,
):
last_message_date = message.created_at
self.last_message_id = message.id
m = MessageLog(message, self)
await m.load(message)
self.messages.insert(0, m)
yield len(self.messages), False
else: # first load
last_message_id = None
done = 0
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check == 1:
while done >= CHUNK_SIZE or last_message_id is None:
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(last_message_id)
if last_message_id is not None
else None,
oldest_first=False,
):
done += 1
last_message_id = message.id
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
self.last_message_id = channel.last_message_id
except discord.errors.HTTPException:
yield -1, True
return # When an exception occurs (like Forbidden)
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
yield len(self.messages), True
def dict(self) -> dict:
channel = dict(self.__dict__)
channel.pop("channel", None)
channel.pop("guild", None)
for key in NOT_SERIALIZED:
channel.pop(key, None)
channel["messages"] = [message.dict() for message in self.messages]
return channel
+122 -58
View File
@@ -15,6 +15,7 @@ from utils import code_message, delta, deltas
LOG_DIR = "logs"
LOG_EXT = ".logz"
current_analysis = []
current_analysis_lock = threading.Lock()
@@ -22,12 +23,22 @@ current_analysis_lock = threading.Lock()
ALREADY_RUNNING = -100
CANCELLED = -200
NO_FILE = -300
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = 5 * 60
# ~1 year, remove log file
MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
class Worker:
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
def __init__(
self,
channel_log: ChannelLogs,
channel: discord.TextChannel,
start_date: datetime,
stop_date: datetime,
):
self.channel_log = channel_log
self.channel = channel
self.start_msg = len(channel_log.messages)
@@ -36,12 +47,16 @@ class Worker:
self.done = False
self.cancelled = False
self.loop = asyncio.get_event_loop()
self.start_date = start_date
self.stop_date = stop_date
def start(self):
asyncio.run_coroutine_threadsafe(self.process(), self.loop)
async def process(self):
async for count, done in self.channel_log.load(self.channel):
async for count, done in self.channel_log.load(
self.channel, self.start_date, self.stop_date
):
if count > 0:
self.queried_msg = count - self.start_msg
self.total_msg = count
@@ -54,7 +69,7 @@ class GuildLogs:
def __init__(self, guild: discord.Guild):
self.id = guild.id
self.guild = guild
self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
self.log_file = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
self.channels = {}
self.locked = False
@@ -74,26 +89,29 @@ class GuildLogs:
return self.locked and self.log_file not in current_analysis
def lock(self) -> bool:
self.locked = True
current_analysis_lock.acquire()
if self.log_file in current_analysis:
current_analysis_lock.release()
return False
self.locked = True
current_analysis.append(self.log_file)
current_analysis_lock.release()
return True
def unlock(self):
self.locked = False
current_analysis_lock.acquire()
if self.log_file in current_analysis:
current_analysis.remove(self.log_file)
current_analysis_lock.release()
if self.locked:
self.locked = False
current_analysis_lock.acquire()
if self.log_file in current_analysis:
current_analysis.remove(self.log_file)
current_analysis_lock.release()
async def load(
self,
progress: discord.Message,
target_channels: List[discord.TextChannel] = [],
target_channels: List[discord.TextChannel],
start_date: datetime,
stop_date: datetime,
*,
fast: bool,
fresh: bool,
@@ -106,52 +124,49 @@ class GuildLogs:
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
last_time = None
if os.path.exists(self.log_file):
channels = {}
try:
last_time = os.path.getmtime(self.log_file)
gziped_data = None
await code_message(progress, "Reading saved history (1/4)...")
t0 = datetime.now()
with open(self.log_file, mode="rb") as f:
gziped_data = f.read()
logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (2/4)...")
t0 = datetime.now()
json_data = gzip.decompress(gziped_data)
del gziped_data
logging.info(
f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
)
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/4)...")
t0 = datetime.now()
channels = json.loads(json_data)
del json_data
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (4/4)...")
t0 = datetime.now()
self.channels = {
int(id): ChannelLogs(channels[id], self) for id in channels
}
# remove invalid format
self.channels = {
id: self.channels[id]
for id in self.channels
if self.channels[id].is_format()
}
logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms")
except json.decoder.JSONDecodeError:
logging.error(f"log {self.guild.id} > invalid JSON")
except IOError:
logging.error(f"log {self.guild.id} > cannot read")
else:
fast = False
if not os.path.exists(self.log_file):
return NO_FILE, 0
channels = {}
try:
last_time = os.path.getmtime(self.log_file)
gziped_data = None
await code_message(progress, "Reading saved history (1/4)...")
t0 = datetime.now()
with open(self.log_file, mode="rb") as f:
gziped_data = f.read()
logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (2/4)...")
t0 = datetime.now()
json_data = gzip.decompress(gziped_data)
del gziped_data
logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/4)...")
t0 = datetime.now()
channels = json.loads(json_data)
del json_data
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (4/4)...")
t0 = datetime.now()
self.channels = {
int(id): ChannelLogs(channels[id], self) for id in channels
}
# remove invalid format
self.channels = {
id: self.channels[id]
for id in self.channels
if self.channels[id].is_format()
}
logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms")
except json.decoder.JSONDecodeError:
logging.error(f"log {self.guild.id} > invalid JSON")
except IOError:
logging.error(f"log {self.guild.id} > cannot read")
if len(target_channels) == 0:
target_channels = (
@@ -171,6 +186,8 @@ class GuildLogs:
if (
not fast
and not fresh
and start_date is None
and stop_date is None
and last_time is not None
and (time.time() - last_time) < MIN_MODIFICATION_TIME
):
@@ -178,8 +195,10 @@ class GuildLogs:
channel
for channel in target_channels
if channel.id not in self.channels
or self.channels[channel.id].first_message_id is not None
]
if len(invalid_target_channels) == 0:
logging.info(f"log {self.guild.id} > assumed fast")
fast = True
if self.locked:
self.unlock()
@@ -212,7 +231,9 @@ class GuildLogs:
if channel.id not in self.channels or fresh:
loading_new += 1
self.channels[channel.id] = ChannelLogs(channel, self)
workers += [Worker(self.channels[channel.id], channel)]
workers += [
Worker(self.channels[channel.id], channel, start_date, stop_date)
]
warning_msg = "(this might take a while)"
if len(target_channels) > 5 and loading_new > 5:
warning_msg = "(most channels are new, this will take a long while)"
@@ -253,7 +274,7 @@ class GuildLogs:
f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
)
logging.info(
f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
)
# write logs
real_total_msg = sum(
@@ -322,3 +343,46 @@ class GuildLogs:
f"No cancellable analysis are currently running on this server",
reference=message,
)
@staticmethod
def init_log(guild: List[discord.Guild]):
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
if not os.path.exists(filename):
with open(filename, mode="wb") as f:
f.write(gzip.compress(bytes("{}", "utf-8")))
logging.info(f"log {guild.id} > created")
else:
logging.info(f"log {guild.id} > already exists")
@staticmethod
def remove_log(guild: List[discord.Guild]):
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
if os.path.exists(filename):
os.unlink(filename)
logging.info(f"log {guild.id} > removed")
else:
logging.info(f"log {guild.id} > does not exists")
@staticmethod
def check_logs(guilds: List[discord.Guild]):
logging.info(f"checking logs...")
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
guild_ids = [str(guild.id) for guild in guilds]
for item in os.listdir(LOG_DIR):
path = os.path.join(LOG_DIR, item)
name, ext = os.path.splitext(item)
if os.path.isfile(path) and ext == LOG_EXT:
if (
name in guild_ids
and (time.time() - os.path.getmtime(path)) > MAX_MODIFICATION_TIME
):
logging.info(f"> removing old log '{path}'")
os.unlink(path)
elif name not in guild_ids:
logging.info(f"> removing unused log '{path}'")
os.unlink(path)
+5 -1
View File
@@ -8,6 +8,9 @@ IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
EMBED_IMAGES = ["image", "gifv"]
NOT_SERIALIZED = ["channel"]
class MessageLog:
def __init__(self, message: Union[discord.Message, dict], channel: Any):
self.channel = channel
@@ -79,7 +82,8 @@ class MessageLog:
def dict(self) -> dict:
message = dict(self.__dict__)
message.pop("channel", None)
for key in NOT_SERIALIZED:
message.pop(key, None)
message["created_at"] = self.created_at.isoformat()
message["edited_at"] = (
self.edited_at.isoformat() if self.edited_at is not None else None
+29 -9
View File
@@ -6,7 +6,7 @@ if sys.version_info < (3, 7):
print("Please upgrade your Python version to 3.7.0 or higher")
sys.exit(1)
from utils import emojis
from utils import emojis, gdpr
from scanners import (
EmotesScanner,
FullScanner,
@@ -33,17 +33,43 @@ emojis.load_emojis()
bot = Bot(
"Discord Analyst",
"1.12",
"1.13",
alias="%",
)
bot.log_calls = True
async def on_ready():
GuildLogs.check_logs(bot.client.guilds)
return True
async def on_guild_remove():
GuildLogs.check_logs(bot.client.guilds)
return True
bot.register_event(on_ready)
bot.register_event(on_guild_remove)
bot.register_command(
"(cancel|stop)",
GuildLogs.cancel,
"cancel: stop current analysis (not launched with fast)",
"```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
"```\n%cancel: Stop current analysis (not launched with fast)\n```",
)
bot.register_command(
"gdpr",
gdpr.process,
"gdpr: displays GDPR information",
gdpr.HELP,
)
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: (BETA) rank words by their usage",
WordsScanner.help(),
)
bot.register_command(
"last",
@@ -63,12 +89,6 @@ bot.register_command(
"first: read first message",
FirstScanner.help(),
)
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: rank words by their usage",
WordsScanner.help(),
)
bot.register_command(
"mentioned",
lambda *args: MentionedScanner().compute(*args),
+1 -1
View File
@@ -11,4 +11,4 @@ from .reactions_scanner import ReactionsScanner
from .first_scanner import FirstScanner
from .last_scanner import LastScanner
from .random_scanner import RandomScanner
from .words_scanner import WordsScanner
from .words_scanner import WordsScanner
+6 -10
View File
@@ -8,21 +8,17 @@ import discord
from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import COMMON_HELP_ARGS, mention, channel_mention
from utils import generate_help, mention, channel_mention
class ChannelsScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%chan: Rank channels by their messages\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - top <n>, default is 10\n"
+ "* all/everyone - include bots\n"
+ "Example: %chan 10 @user\n"
+ "```"
return generate_help(
"chan",
"Rank channels by their messages",
args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
example="5 @user",
)
def __init__(self):
+2 -10
View File
@@ -8,21 +8,13 @@ import discord
from .scanner import Scanner
from data_types import Composition
from logs import ChannelLogs, MessageLog
from utils import emojis, COMMON_HELP_ARGS
from utils import emojis, generate_help
class CompositionScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%compo: Show composition statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %compo #mychannel1 @user\n"
+ "```"
)
return generate_help("compo", "Show composition statistics")
def __init__(self):
super().__init__(
+12 -13
View File
@@ -8,24 +8,23 @@ import discord
from logs import ChannelLogs, MessageLog
from data_types import Emote, get_emote_dict
from .scanner import Scanner
from utils import emojis, COMMON_HELP_ARGS, plural, precise
from utils import emojis, generate_help, plural, precise
class EmotesScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%emojis: Rank emojis by their usage\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - top <n> emojis, default is 20\n"
+ "* all - list all common emojis in addition to this guild's\n"
+ "* members - show top member for each emojis\n"
+ "* sort:usage/reaction - other sorting methods\n"
+ "* everyone - include bots\n"
+ "Example: %emojis 10 all #mychannel1 #mychannel2 @user\n"
+ "```"
return generate_help(
"emojis",
"Rank emojis by their usage",
args=[
"<n> - top <n> emojis, default is 20",
"all - list all common emojis in addition to this guild's",
"members - show top member for each emojis",
"sort:usage/reaction - other sorting methods",
"everyone - include bots",
],
example="10 all #mychannel1 #mychannel2 @user",
)
def __init__(self):
+2 -3
View File
@@ -3,14 +3,13 @@ from typing import List
# Custom libs
from .history_scanner import HistoryScanner
from utils import generate_help
class FirstScanner(HistoryScanner):
@staticmethod
def help() -> str:
return super(FirstScanner, FirstScanner).help(
cmd="first", text="Read first message"
)
return generate_help("first", "Read first message")
def __init__(self):
super().__init__(help=FirstScanner.help())
+3 -11
View File
@@ -8,21 +8,13 @@ import discord
from .scanner import Scanner
from data_types import Frequency
from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS
from utils import generate_help
class FrequencyScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%freq: Show frequency-related statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %freq #mychannel1 @user\n"
+ "```"
)
return generate_help("freq", "Show frequency-related statistics")
def __init__(self):
super().__init__(
@@ -55,7 +47,7 @@ class FrequencyScanner(Scanner):
freq: Frequency,
raw_members: List[int],
*,
all_messages: bool
all_messages: bool,
) -> bool:
impacted = False
# If author is included in the selection (empty list is all)
+2 -10
View File
@@ -8,21 +8,13 @@ from .scanner import Scanner
from . import FrequencyScanner, CompositionScanner, PresenceScanner
from data_types import Frequency, Composition, Presence
from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS
from utils import generate_help
class FullScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%scan: Show full statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %scan #mychannel1 @user\n"
+ "```"
)
return generate_help("scan", "Show full statistics")
def __init__(self):
super().__init__(
-13
View File
@@ -7,22 +7,9 @@ import discord
from .scanner import Scanner
from data_types import History
from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS
class HistoryScanner(Scanner, ABC):
@staticmethod
def help(*, cmd: str, text: str) -> str:
return (
"```\n"
+ f"%{cmd}: {text}\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %{cmd} #mychannel1 @user\n"
+ "```"
)
def __init__(self, *, help: str):
super().__init__(
has_digit_args=True,
+2 -3
View File
@@ -3,14 +3,13 @@ from typing import List
# Custom libs
from .history_scanner import HistoryScanner
from utils import generate_help
class LastScanner(HistoryScanner):
@staticmethod
def help() -> str:
return super(LastScanner, LastScanner).help(
cmd="last", text="Read last message"
)
return generate_help("last", "Read last message")
def __init__(self):
super().__init__(help=LastScanner.help())
+8 -12
View File
@@ -8,22 +8,18 @@ import discord
from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import COMMON_HELP_ARGS, plural, precise, mention, alt_mention
from utils import generate_help, plural, precise, mention, alt_mention
class MentionedScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%mentioned: Rank specific user's mentions by their usage\n"
+ "arguments:\n"
+ "* @member/me - (required) one or more member\n"
+ "\n".join(COMMON_HELP_ARGS.split("\n")[1:])
+ "* <n> - top <n> mentions, default is 10\n"
+ "* all - include bots mentions\n"
+ "Example: %mentioned 10 @user\n"
+ "```"
return generate_help(
"mentioned",
"Rank specific user's mentions by their usage",
args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
example="5 @user",
replace_args=[" @member/me - (required) one or more member"],
)
def __init__(self):
@@ -45,7 +41,7 @@ class MentionedScanner(Scanner):
"You need to mention at least one member or use `me`", reference=message
)
return False
self.all_mentions = "all" in args
self.all_mentions = "all" in args or "everyone" in args
# Create mentions dict
self.mentions = defaultdict(Counter)
return True
+10 -11
View File
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import (
COMMON_HELP_ARGS,
generate_help,
plural,
precise,
mention,
@@ -22,16 +22,15 @@ from utils import (
class MentionsScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%mentions: Rank mentions by their usage\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - top <n> mentions, default is 10\n"
+ "* all - show role/channel/everyone/here mentions\n"
+ "* everyone - include bots mentions\n"
+ "Example: %mentions 10 #mychannel1 #mychannel2 @user\n"
+ "```"
return generate_help(
"mentions",
"Rank mentions by their usage",
args=[
"<n> - top <n>, default is 10",
"all - show role/channel/everyone/here mentions",
"everyone - include bots mentions",
],
example="10 #mychannel1 #mychannel2 @user",
)
def __init__(self):
+6 -10
View File
@@ -8,21 +8,17 @@ import discord
from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import COMMON_HELP_ARGS, mention, channel_mention
from utils import generate_help, mention, channel_mention
class MessagesScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%msg: Rank users by their messages\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - top <n>, default is 10\n"
+ "* all/everyone - include bots\n"
+ "Example: %msg 10 #channel\n"
+ "```"
return generate_help(
"msg",
"Rank users by their messages",
args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
example="10 #channel",
)
def __init__(self):
+2 -10
View File
@@ -7,21 +7,13 @@ import discord
from .scanner import Scanner
from data_types import Presence
from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS
from utils import generate_help
class PresenceScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%pres: Show presence statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %pres #mychannel1 @user\n"
+ "```"
)
return generate_help("pres", "Show presence statistics")
def __init__(self):
super().__init__(
+2 -3
View File
@@ -3,14 +3,13 @@ from typing import List
# Custom libs
from .history_scanner import HistoryScanner
from utils import generate_help
class RandomScanner(HistoryScanner):
@staticmethod
def help() -> str:
return super(RandomScanner, RandomScanner).help(
cmd="rand", text="Read a random message"
)
return generate_help("rand", "Read a random message")
def __init__(self):
super().__init__(help=RandomScanner.help())
+6 -9
View File
@@ -8,20 +8,17 @@ import discord
from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import COMMON_HELP_ARGS, mention, channel_mention
from utils import generate_help, mention, channel_mention
class ReactionsScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%react: Rank users by their reactions\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - top <n>, default is 10\n"
+ "Example: %react 10 #channel\n"
+ "```"
return generate_help(
"react",
"Rank users by their reactions",
args=["<n> - top <n>, default is 10"],
example="10 #channel",
)
def __init__(self):
+82 -13
View File
@@ -5,8 +5,24 @@ import logging
import re
import discord
from utils import no_duplicate, get_intro, delta
from logs import GuildLogs, ChannelLogs, MessageLog, ALREADY_RUNNING, CANCELLED
from utils import (
no_duplicate,
get_intro,
delta,
gdpr,
ISO8601_REGEX,
RELATIVE_REGEX,
parse_time,
)
from logs import (
GuildLogs,
ChannelLogs,
MessageLog,
ALREADY_RUNNING,
CANCELLED,
NO_FILE,
)
class Scanner(ABC):
@@ -47,22 +63,42 @@ class Scanner(ABC):
str(channel.id) for channel in message.channel_mentions
]
str_mentions = [str(member.id) for member in message.mentions]
dates = []
for i, arg in enumerate(args[1:]):
skip_check = False
if re.match(r"^<@!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(r"^<#!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(ISO8601_REGEX, arg) or re.match(RELATIVE_REGEX, arg):
dates += [parse_time(arg)]
skip_check = True
if len(dates) > 2:
await message.channel.send(
f"Too many date arguments: `{arg}`", reference=message
)
return
if (
arg not in self.valid_args + ["me", "here", "fast", "fresh"]
and (not arg.isdigit() or not self.has_digit_args)
and arg not in str_channel_mentions
and arg not in str_mentions
and not skip_check
):
await message.channel.send(
f"Unrecognized argument: `{arg}`", reference=message
)
return
self.start_date = None if len(dates) < 1 else min(dates)
self.stop_date = None if len(dates) < 2 else max(dates)
if self.start_date is not None and self.start_date > datetime.now():
await message.channel.send(
f"Start date is after today", reference=message
)
return
# Get selected channels or all of them if no channel arguments
self.channels = no_duplicate(message.channel_mentions)
@@ -94,7 +130,12 @@ class Scanner(ABC):
allowed_mentions=discord.AllowedMentions.none(),
)
total_msg, total_chan = await logs.load(
progress, self.channels, fast="fast" in args, fresh="fresh" in args
progress,
self.channels,
self.start_date,
self.stop_date,
fast="fast" in args,
fresh="fresh" in args,
)
if total_msg == CANCELLED:
await message.channel.send(
@@ -106,7 +147,24 @@ class Scanner(ABC):
"An analysis is already running on this server, please be patient.",
reference=message,
)
elif total_msg == NO_FILE:
await message.channel.send(gdpr.TEXT)
else:
if self.start_date is not None and len(logs.channels) > 0:
self.start_date = max(
self.start_date,
min(
[
logs.channels[channel.id].start_date
for channel in self.channels
if channel.id in logs.channels
and logs.channels[channel.id].start_date is not None
]
),
)
if self.stop_date is None:
self.stop_date = datetime.utcnow()
self.msg_count = 0
self.total_msg = 0
self.chan_count = 0
@@ -118,13 +176,21 @@ class Scanner(ABC):
[
self.compute_message(channel_logs, message_log)
for message_log in channel_logs.messages
if (
self.start_date is None
or message_log.created_at >= self.start_date
)
and (
self.stop_date is None
or message_log.created_at <= self.stop_date
)
]
)
self.total_msg += len(channel_logs.messages)
self.msg_count += count
self.chan_count += 1 if count > 0 else 0
logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
if self.total_msg == 0:
if self.msg_count == 0:
await message.channel.send(
"There are no messages found matching the filters",
reference=message,
@@ -141,21 +207,24 @@ class Scanner(ABC):
self.members,
self.msg_count,
self.chan_count,
self.start_date,
self.stop_date,
)
)
logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
response = ""
first = True
for r in results:
if len(response + "\n" + r) > 2000:
await message.channel.send(
response,
reference=message if first else None,
allowed_mentions=discord.AllowedMentions.none(),
)
first = False
response = ""
response += "\n" + r
if r:
if len(response + "\n" + r) > 2000:
await message.channel.send(
response,
reference=message if first else None,
allowed_mentions=discord.AllowedMentions.none(),
)
first = False
response = ""
response += "\n" + r
if len(response) > 0:
await message.channel.send(
response,
+16 -22
View File
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import (
COMMON_HELP_ARGS,
generate_help,
plural,
precise,
)
@@ -18,16 +18,15 @@ from utils import (
class WordsScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%words: Rank words by their usage\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - words containings <n> or more letters, default is 3\n"
+ "* <n2> - top <n2> words, default is 10\n"
+ "* everyone - include bots\n"
+ "Example: %words 5 10 #mychannel1 #mychannel2 @user\n"
+ "```"
return generate_help(
"words",
"(BETA) Rank words by their usage",
args=[
"<n> - words containings <n> or more letters, default is 3",
"<n2> - top <n2> words, default is 10",
"all/everyone - include bots",
],
example="5 10 #mychannel1 #mychannel2 @user",
)
def __init__(self):
@@ -104,16 +103,13 @@ class WordsScanner(Scanner):
or message.author in raw_members
):
impacted = True
content = " ".join(
[
block
for block in message.content.split()
if not re.match(r"^\w+:\/\/", block)
]
)
content = message.content
content = re.sub(r"```.+```", "", content, flags=re.DOTALL)
content = re.sub(r"`.+`", "", content, flags=re.DOTALL)
content = re.sub(r"\w+:\/\/[^ ]+", "", content)
for word in re.split("[^\w\-':]", content):
m = re.match(
r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word
r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word
)
if m:
word = m[1].lower()
@@ -126,7 +122,5 @@ class WordsScanner(Scanner):
words[word] = words[word + case]
del words[word + case]
break
words[word].update_use(
message.content.count(word), message.created_at
)
words[word].update_use(1, message.created_at)
return impacted
+65
View File
@@ -0,0 +1,65 @@
import discord
from logs import GuildLogs
HELP = """```
%gdpr: Displays GDPR information
arguments:
* agree - agree to GDPR
* revoke - remove this server's data
```"""
TEXT = """
__**About Analyst-bot's data usage**__
**TL;DR**
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
**Data collection**
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
- Visible text channel names
- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)
This does __not__ includes:
- Voice channels and not visible channels
- Not visible text messages
- Visible text messages' embedded content, images and other attachments
**Data processing**
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
**Data storage and retain policy**
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
**Data sharing**
Analyst-bot does not share the data collected with any third-party.
**Right to retract**
If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
**Terms agreement**
By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
"""
AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server."
REVOKE_TEXT = "This guild/server's data has been deleted. To run new analysis you must agree to the terms again."
async def process(client: discord.client, message: discord.Message, *args: str):
args = list(args)
if len(args) == 1:
await message.channel.send(TEXT)
elif len(args) > 2:
await message.channel.send(f"Too many arguments", reference=message)
elif args[1] == "help":
await message.channel.send(HELP, reference=message)
elif args[1] in ["agree", "accept"]:
GuildLogs.init_log(message.channel.guild)
await message.channel.send(AGREE_TEXT, reference=message)
elif args[1] in ["revoke", "cancel", "remove", "delete"]:
GuildLogs.remove_log(message.channel.guild)
await message.channel.send(REVOKE_TEXT, reference=message)
else:
await message.channel.send(
f"Unrecognized argument: `{args[1]}`", reference=message
)
+123 -35
View File
@@ -1,19 +1,44 @@
from typing import List, Dict, Union, Optional, Any
from calendar import month
from typing import Callable, List, Dict, Union, Optional, Any
import os
import logging
import discord
import math
from datetime import datetime
from datetime import datetime, timedelta
import re
import dateutil.parser
from dateutil.relativedelta import relativedelta
# OTHER
COMMON_HELP_ARGS = (
""
+ "* @member/me - filter for one or more member\n"
+ "* #channel/here - filter for one or more channel\n"
+ "* fast - only read cache\n"
+ "* fresh - does not read cache (long)\n"
)
COMMON_HELP_ARGS = [
"@member/me - filter for one or more member",
"#channel/here - filter for one or more channel",
"<date1> - filter after <date1>",
"<date2> - filter before <date2>",
"fast - only read cache",
"fresh - does not read cache (long)",
]
def generate_help(
cmd: str,
info: str,
*,
args=["all/everyone - include bots"],
example="#mychannel1 @user",
replace_args=[],
):
arg_list = "* " + "\n* ".join(
replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
)
return f"""```
%{cmd}: {info}
arguments:
{arg_list}
(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
Example: %{cmd} {example}
```"""
def delta(t0: datetime):
@@ -92,11 +117,19 @@ def no_duplicate(seq: list) -> list:
# DICTS
def top_key(d: Dict[Union[str, int], int]) -> Union[str, int]:
return sorted(d, key=lambda k: d[k])[-1]
def top_key(
d: Dict[Union[str, int], int], key: Optional[Callable] = None
) -> Union[str, int]:
if len(d) == 0:
return None
if key is None:
key = lambda k: d[k]
return sorted(d, key=key)[-1]
def val_sum(d: Dict[Any, int]) -> int:
if len(d) == 0:
return 0
return sum(d.values())
@@ -135,6 +168,51 @@ def precise(p: float, *, precision: int = 2) -> str:
# DATE FORMATTING
ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
ISO8601_FULL = "0000-01-01T00:00:00"
def parse_iso_datetime(str_date: str) -> datetime:
if re.match(
"^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
):
str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
return dateutil.parser.parse(str_date)
RELATIVE_REGEX = r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?)?)"
def parse_relative_time(src: str) -> datetime:
timezone_delta = datetime.utcnow() - datetime.now()
if src == "today":
return datetime.today() + timezone_delta
elif src == "yesterday":
return datetime.today() - relativedelta(days=1) + timezone_delta
else:
m = re.match("(\d*)(\w+)", src)
delta = None
value = int(m[1]) if m[1] else 1
unit = m[2][0]
if unit == "h":
delta = relativedelta(hours=value)
elif unit == "d":
delta = relativedelta(days=value)
elif unit == "w":
delta = relativedelta(weeks=value)
elif unit == "m":
delta = relativedelta(months=value)
elif unit == "y":
delta = relativedelta(years=value)
return datetime.utcnow() - delta
def parse_time(src: str) -> datetime:
if re.match(RELATIVE_REGEX, src):
return parse_relative_time(src)
else:
return parse_iso_datetime(src)
def str_date(date: datetime) -> str:
return date.strftime("%d %b. %Y") # 12 Jun. 2018
@@ -144,29 +222,37 @@ def str_datetime(date: datetime) -> str:
return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
delay = datetime.utcnow() - src
def str_delta(delay: timedelta) -> str:
seconds = delay.seconds
minutes = seconds // 60
hours = minutes // 60
if delay.days < 1:
if hours < 1:
if minutes == 0:
return "now"
return "no time"
elif minutes == 1:
return "a minute ago"
return "a minute"
else:
return f"{minutes} minutes ago"
return f"{minutes} minutes"
elif hours == 1:
return "an hour ago"
return "an hour"
else:
return f"{hours} hours ago"
return f"{hours} hours"
elif delay.days == 1:
return "yesterday"
return "one day"
else:
return f"{delay.days:,} days ago"
return f"{delay.days:,} days"
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
output = str_delta(datetime.utcnow() - src)
if output == "no time":
return "now"
elif output == "one day":
return "yesterday"
return output + " ago"
# APP SPECIFIC
@@ -179,46 +265,48 @@ def get_intro(
members: List[discord.Member],
nmm: int, # number of messages impacted
nc: int, # number of impacted channels
start_datetime: datetime,
stop_datetime: datetime,
) -> str:
"""
Get the introduction sentence of the response
"""
time_text = ""
if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units
if len(members) == 0:
# Full scan of the server
if full:
return f"{subject} in this server ({nc} channels, {nmm:,} messages):"
return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
elif len(channels) < 5:
return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:"
return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
else:
return (
f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
)
return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(members) < 5:
if full:
return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:"
return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(channels) < 5:
return (
f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)
else:
return (
f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)
else:
if full:
return (
f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
)
return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(channels) < 5:
return (
f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)
else:
return (
f"These {len(members)} members on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:"
f"{subject.lower()} in {nmm:,} messages{time_text}:"
)