Merge pull request #32 from Klemek/dev

v1.13
This commit is contained in:
Klemek
2021-04-09 19:51:54 +02:00
committed by GitHub
30 changed files with 728 additions and 414 deletions
+26
View File
@@ -0,0 +1,26 @@
name: Python
on: ["push", "pull_request"]
jobs:
syntax:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+12 -1
View File
@@ -21,6 +21,7 @@
* %first - read first message * %first - read first message
* %rand - read a random message * %rand - read a random message
* %last - read last message * %last - read last message
* %gdpr - displays GDPR information
* %emojis - rank emotes by their usage * %emojis - rank emotes by their usage
* arguments: * arguments:
* <n> - top <n> emojis, default is 20 * <n> - top <n> emojis, default is 20
@@ -43,7 +44,7 @@
* %react - rank users by their reactions * %react - rank users by their reactions
* arguments: * arguments:
* <n> - top <n> messages, default is 10 * <n> - top <n> messages, default is 10
* %words - rank words by their usage * %words - (BETA) rank words by their usage
* arguments: * arguments:
* <n> - words containings <n> or more letters, default is 3 * <n> - words containings <n> or more letters, default is 3
* <n2> - top <n2> words, default is 10 * <n2> - top <n2> words, default is 10
@@ -52,9 +53,13 @@
* Common arguments: * Common arguments:
* @member/me: filter for one or more member * @member/me: filter for one or more member
* #channel/here: filter for one or more channel * #channel/here: filter for one or more channel
* <date1> - filter after <date1>
* <date2> - filter before <date2>
* all/everyone - include bots messages * all/everyone - include bots messages
* fast: only read cache * fast: only read cache
* fresh: does not read cache * fresh: does not read cache
(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
``` ```
## Running this bot ## Running this bot
@@ -104,6 +109,12 @@ python3 src/main.py
## Changelog ## Changelog
* **v1.13**
* improved scan `%words`
* remove old and unused logs at start and guild leaving
* GDPR disclaimer before scanning
* start and stop dates
* bug fix and improvements
* **v1.12** * **v1.12**
* more scans: `%words` * more scans: `%words`
* concurrent `fast` analysis * concurrent `fast` analysis
+1
View File
@@ -1,3 +1,4 @@
discord.py discord.py
python-dotenv python-dotenv
python-dateutil
git+git://github.com/Klemek/miniscord.git git+git://github.com/Klemek/miniscord.git
+36 -40
View File
@@ -23,49 +23,45 @@ class Composition:
self.spoilers = 0 self.spoilers = 0
def to_string(self, msg_count: int) -> List[str]: def to_string(self, msg_count: int) -> List[str]:
ret = []
ret += [
f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}"
]
if self.plain_text > 0:
ret += [
f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
]
if self.edited > 0:
ret += [
f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
]
if self.everyone > 0:
ret += [
f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
]
if self.mentions > 0:
ret += [
f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)",
]
if self.answers > 0:
ret += [
f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
]
total_emotes = val_sum(self.emotes) total_emotes = val_sum(self.emotes)
if total_emotes > 0:
top_emote = top_key(self.emotes) top_emote = top_key(self.emotes)
ret += [ ret = [
f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)", f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}",
f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})", f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
] if self.plain_text > 0
if self.emote_only > 0: else "",
ret += [ f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
if self.edited > 0
else "",
f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
if self.everyone > 0
else "",
f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)"
if self.mentions > 0
else "",
f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
if self.answers > 0
else "",
f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)"
if total_emotes > 0
else "",
f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})"
if total_emotes > 0
else "",
f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})" f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
] if self.emote_only > 0
if self.images > 0: else "",
ret += [f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"] f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"
if self.links > 0: if self.images > 0
ret += [f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"] else "",
if self.spoilers > 0: f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"
ret += [ if self.links > 0
else "",
f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})" f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
if self.spoilers > 0
else "",
f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"
if self.tts > 0
else "",
] ]
if self.tts > 0:
ret += [f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"]
return ret return ret
+9 -9
View File
@@ -38,7 +38,10 @@ class Frequency:
*, *,
member_specific: bool, member_specific: bool,
) -> List[str]: ) -> List[str]:
self.dates.sort()
delta = self.dates[-1] - self.dates[0] delta = self.dates[-1] - self.dates[0]
if delta.days == 0:
delta = timedelta(days=1)
total_msg = len(self.dates) total_msg = len(self.dates)
busiest_weekday = top_key(self.week) busiest_weekday = top_key(self.week)
busiest_hour = top_key(self.day) busiest_hour = top_key(self.day)
@@ -46,7 +49,7 @@ class Frequency:
if ( if (
self.dates[0].weekday() <= busiest_weekday self.dates[0].weekday() <= busiest_weekday
and self.dates[-1].weekday() >= busiest_weekday and self.dates[-1].weekday() >= busiest_weekday
): ) or n_weekdays == 0:
n_weekdays += 1 n_weekdays += 1
n_hours = delta.days n_hours = delta.days
if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour: if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:
@@ -56,19 +59,16 @@ class Frequency:
f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})", f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}", f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})", f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)", f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
if self.busiest_day is not None
else "",
f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}", f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})", f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)", f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})", f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})",
f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg", f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg",
]
if member_specific:
ret += [
f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})" f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
] if member_specific
else: else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})",
ret += [
f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})"
] ]
return ret return ret
+51 -55
View File
@@ -25,74 +25,70 @@ class Presence:
show_top_channel: bool, show_top_channel: bool,
member_specific: bool, member_specific: bool,
) -> List[str]: ) -> List[str]:
ret = []
if chan_count is None: if chan_count is None:
type = "server's" type = "server's"
elif chan_count == 1: elif chan_count == 1:
type = "channel's" type = "channel's"
else: else:
type = "channels'" type = "channels'"
if member_specific:
ret += [
f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
]
else:
top_member = top_key(self.messages) top_member = top_key(self.messages)
ret += [
f"- **top messages**: {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})"
]
if show_top_channel:
top_channel = top_key(self.channel_usage) top_channel = top_key(self.channel_usage)
channel_sum = val_sum(self.channel_usage) channel_sum = val_sum(self.channel_usage)
found_in = sorted( found_in = top_key(
self.channel_usage, self.channel_usage,
key=lambda k: self.channel_usage[k] / self.channel_total[k], key=lambda k: self.channel_usage[k] / self.channel_total[k],
)[-1] )
ret += [
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})",
]
if member_specific:
ret += [
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
]
if member_specific:
if len(self.mentions) > 0:
top_mention = top_key(self.mentions) top_mention = top_key(self.mentions)
mention_sum = val_sum(self.mentions) mention_sum = val_sum(self.mentions)
ret += [ top_mention_others = top_key(self.mention_others)
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})", mention_others_sum = val_sum(self.mention_others)
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})", top_member_mentioned = top_key(self.mention_count)
] total_reaction_used = val_sum(self.reactions)
if len(self.mention_others) > 0:
top_mention = top_key(self.mention_others)
mention_sum = val_sum(self.mention_others)
if member_specific:
ret += [
f"- **mentioned others**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
f"- **mostly mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
]
else:
top_member = top_key(self.mention_count)
ret += [
f"- **mentioned**: {plural(mention_sum, 'time')} ({mention(top_member)}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
f"- **top mentions**: {mention(top_member)} ({plural(self.mention_count[top_member], 'time')}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
f"- **most mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
]
if len(self.reactions) > 0:
total_used = val_sum(self.reactions)
top_reaction = top_key(self.reactions) top_reaction = top_key(self.reactions)
ret += [ top_reaction_member = top_key(self.used_reaction)
f"- **reactions**: {plural(total_used, 'time')}",
f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_used)})", ret = [
f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
if member_specific
else f"- **top messages**: {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})",
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
if show_top_channel
else "",
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
if show_top_channel and member_specific
else "",
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
if member_specific and len(self.mentions) > 0
else "",
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
if member_specific and len(self.mentions) > 0
else "",
f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
if len(self.mention_others) > 0 and member_specific
else "",
f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
if len(self.mention_others) > 0 and member_specific
else "",
f"- **mentioned**: {plural(mention_others_sum, 'time')} ({mention(top_member_mentioned)}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
if len(self.mention_others) > 0 and not member_specific
else "",
f"- **top mentions**: {mention(top_member_mentioned)} ({plural(self.mention_count[top_member_mentioned], 'time')}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
if len(self.mention_others) > 0 and not member_specific
else "",
f"- **most mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
if len(self.mention_others) > 0 and not member_specific
else "",
f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and not member_specific
else "",
f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
if len(self.reactions) > 0 and member_specific
else "",
f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
if len(self.reactions) > 0 and not member_specific
else "",
f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_reaction_used)})"
if len(self.reactions) > 0
else "",
] ]
if member_specific:
ret[
-2
] += f" ({percent(total_used/val_sum(self.used_reaction))} of {type})"
else:
top_member = top_key(self.used_reaction)
ret.insert(
-1,
f"- **top reactions**: {mention(top_member)} ({plural(self.used_reaction[top_member], 'time')}, {percent(self.used_reaction[top_member]/val_sum(self.used_reaction))})",
)
return ret return ret
+1 -1
View File
@@ -1,3 +1,3 @@
from .message_log import MessageLog from .message_log import MessageLog
from .channel_logs import ChannelLogs from .channel_logs import ChannelLogs
from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED, NO_FILE
+72 -27
View File
@@ -1,5 +1,7 @@
from typing import Union, Tuple, Any from typing import Union, Tuple, Any
import discord import discord
from discord import message
from datetime import datetime
from . import MessageLog from . import MessageLog
from utils import FakeMessage from utils import FakeMessage
@@ -7,6 +9,8 @@ from utils import FakeMessage
CHUNK_SIZE = 2000 CHUNK_SIZE = 2000
FORMAT = 3 FORMAT = 3
NOT_SERIALIZED = ["channel", "guild", "start_date"]
class ChannelLogs: class ChannelLogs:
def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any): def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -15,8 +19,10 @@ class ChannelLogs:
self.id = channel.id self.id = channel.id
self.name = channel.name self.name = channel.name
self.last_message_id = None self.last_message_id = None
self.first_message_id = None
self.format = FORMAT self.format = FORMAT
self.messages = [] self.messages = []
self.start_date = None
elif isinstance(channel, dict): elif isinstance(channel, dict):
self.format = channel["format"] if "format" in channel else None self.format = channel["format"] if "format" in channel else None
if not self.is_format(): if not self.is_format():
@@ -28,63 +34,102 @@ class ChannelLogs:
if channel["last_message_id"] is not None if channel["last_message_id"] is not None
else None else None
) )
self.first_message_id = (
int(channel["first_message_id"])
if "first_message_id" in channel
and channel["first_message_id"] is not None
else None
)
self.messages = [ self.messages = [
MessageLog(message, self) for message in channel["messages"] MessageLog(message, self) for message in channel["messages"]
] ]
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
def is_format(self): def is_format(self):
return self.format == FORMAT return self.format == FORMAT
async def load(self, channel: discord.TextChannel) -> Tuple[int, int]: async def load(
self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
) -> Tuple[int, int]:
self.name = channel.name self.name = channel.name
self.channel = channel self.channel = channel
is_empty = self.last_message_id is None
try: try:
if self.last_message_id is not None: # append if is_empty:
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check != 1:
yield len(self.messages), True
return
# load backward
if is_empty or (
self.first_message_id is not None
and (
start_date is None
or (self.start_date is not None and self.start_date > start_date)
)
):
first_message_date = None
tmp_message_id = 0
done = 0
while (
first_message_date is None
or (
done >= CHUNK_SIZE
and (start_date is None or first_message_date > start_date)
)
) and tmp_message_id != self.first_message_id:
tmp_message_id = self.first_message_id
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(self.first_message_id)
if self.first_message_id is not None
else None,
oldest_first=False,
):
done += 1
self.first_message_id = message.id
first_message_date = message.created_at
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
if done < CHUNK_SIZE: # reached bottom
self.first_message_id = None
self.last_message_id = channel.last_message_id
# load forward
last_message_date = self.messages[0].created_at
if not is_empty and (stop_date is None or last_message_date < stop_date):
tmp_message_id = None tmp_message_id = None
while ( while (
self.last_message_id != channel.last_message_id self.last_message_id != channel.last_message_id
and self.last_message_id != tmp_message_id and (stop_date is None or last_message_date < stop_date)
): ) and self.last_message_id != tmp_message_id:
tmp_message_id = self.last_message_id tmp_message_id = self.last_message_id
async for message in channel.history( async for message in channel.history(
limit=CHUNK_SIZE, limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id), after=FakeMessage(self.last_message_id),
oldest_first=True, oldest_first=True,
): ):
last_message_date = message.created_at
self.last_message_id = message.id self.last_message_id = message.id
m = MessageLog(message, self) m = MessageLog(message, self)
await m.load(message) await m.load(message)
self.messages.insert(0, m) self.messages.insert(0, m)
yield len(self.messages), False yield len(self.messages), False
else: # first load
last_message_id = None
done = 0
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check == 1:
while done >= CHUNK_SIZE or last_message_id is None:
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(last_message_id)
if last_message_id is not None
else None,
oldest_first=False,
):
done += 1
last_message_id = message.id
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
self.last_message_id = channel.last_message_id
except discord.errors.HTTPException: except discord.errors.HTTPException:
yield -1, True yield -1, True
return # When an exception occurs (like Forbidden) return # When an exception occurs (like Forbidden)
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
)
yield len(self.messages), True yield len(self.messages), True
def dict(self) -> dict: def dict(self) -> dict:
channel = dict(self.__dict__) channel = dict(self.__dict__)
channel.pop("channel", None) for key in NOT_SERIALIZED:
channel.pop("guild", None) channel.pop(key, None)
channel["messages"] = [message.dict() for message in self.messages] channel["messages"] = [message.dict() for message in self.messages]
return channel return channel
+77 -13
View File
@@ -15,6 +15,7 @@ from utils import code_message, delta, deltas
LOG_DIR = "logs" LOG_DIR = "logs"
LOG_EXT = ".logz"
current_analysis = [] current_analysis = []
current_analysis_lock = threading.Lock() current_analysis_lock = threading.Lock()
@@ -22,12 +23,22 @@ current_analysis_lock = threading.Lock()
ALREADY_RUNNING = -100 ALREADY_RUNNING = -100
CANCELLED = -200 CANCELLED = -200
NO_FILE = -300
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = 5 * 60 MIN_MODIFICATION_TIME = 5 * 60
# ~1 year, remove log file
MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
class Worker: class Worker:
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel): def __init__(
self,
channel_log: ChannelLogs,
channel: discord.TextChannel,
start_date: datetime,
stop_date: datetime,
):
self.channel_log = channel_log self.channel_log = channel_log
self.channel = channel self.channel = channel
self.start_msg = len(channel_log.messages) self.start_msg = len(channel_log.messages)
@@ -36,12 +47,16 @@ class Worker:
self.done = False self.done = False
self.cancelled = False self.cancelled = False
self.loop = asyncio.get_event_loop() self.loop = asyncio.get_event_loop()
self.start_date = start_date
self.stop_date = stop_date
def start(self): def start(self):
asyncio.run_coroutine_threadsafe(self.process(), self.loop) asyncio.run_coroutine_threadsafe(self.process(), self.loop)
async def process(self): async def process(self):
async for count, done in self.channel_log.load(self.channel): async for count, done in self.channel_log.load(
self.channel, self.start_date, self.stop_date
):
if count > 0: if count > 0:
self.queried_msg = count - self.start_msg self.queried_msg = count - self.start_msg
self.total_msg = count self.total_msg = count
@@ -54,7 +69,7 @@ class GuildLogs:
def __init__(self, guild: discord.Guild): def __init__(self, guild: discord.Guild):
self.id = guild.id self.id = guild.id
self.guild = guild self.guild = guild
self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz") self.log_file = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
self.channels = {} self.channels = {}
self.locked = False self.locked = False
@@ -74,16 +89,17 @@ class GuildLogs:
return self.locked and self.log_file not in current_analysis return self.locked and self.log_file not in current_analysis
def lock(self) -> bool: def lock(self) -> bool:
self.locked = True
current_analysis_lock.acquire() current_analysis_lock.acquire()
if self.log_file in current_analysis: if self.log_file in current_analysis:
current_analysis_lock.release() current_analysis_lock.release()
return False return False
self.locked = True
current_analysis.append(self.log_file) current_analysis.append(self.log_file)
current_analysis_lock.release() current_analysis_lock.release()
return True return True
def unlock(self): def unlock(self):
if self.locked:
self.locked = False self.locked = False
current_analysis_lock.acquire() current_analysis_lock.acquire()
if self.log_file in current_analysis: if self.log_file in current_analysis:
@@ -93,7 +109,9 @@ class GuildLogs:
async def load( async def load(
self, self,
progress: discord.Message, progress: discord.Message,
target_channels: List[discord.TextChannel] = [], target_channels: List[discord.TextChannel],
start_date: datetime,
stop_date: datetime,
*, *,
fast: bool, fast: bool,
fresh: bool, fresh: bool,
@@ -106,7 +124,8 @@ class GuildLogs:
if not os.path.exists(LOG_DIR): if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR) os.mkdir(LOG_DIR)
last_time = None last_time = None
if os.path.exists(self.log_file): if not os.path.exists(self.log_file):
return NO_FILE, 0
channels = {} channels = {}
try: try:
last_time = os.path.getmtime(self.log_file) last_time = os.path.getmtime(self.log_file)
@@ -122,9 +141,7 @@ class GuildLogs:
t0 = datetime.now() t0 = datetime.now()
json_data = gzip.decompress(gziped_data) json_data = gzip.decompress(gziped_data)
del gziped_data del gziped_data
logging.info( logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
)
if self.check_cancelled(): if self.check_cancelled():
return CANCELLED, 0 return CANCELLED, 0
await code_message(progress, "Reading saved history (3/4)...") await code_message(progress, "Reading saved history (3/4)...")
@@ -150,8 +167,6 @@ class GuildLogs:
logging.error(f"log {self.guild.id} > invalid JSON") logging.error(f"log {self.guild.id} > invalid JSON")
except IOError: except IOError:
logging.error(f"log {self.guild.id} > cannot read") logging.error(f"log {self.guild.id} > cannot read")
else:
fast = False
if len(target_channels) == 0: if len(target_channels) == 0:
target_channels = ( target_channels = (
@@ -171,6 +186,8 @@ class GuildLogs:
if ( if (
not fast not fast
and not fresh and not fresh
and start_date is None
and stop_date is None
and last_time is not None and last_time is not None
and (time.time() - last_time) < MIN_MODIFICATION_TIME and (time.time() - last_time) < MIN_MODIFICATION_TIME
): ):
@@ -178,8 +195,10 @@ class GuildLogs:
channel channel
for channel in target_channels for channel in target_channels
if channel.id not in self.channels if channel.id not in self.channels
or self.channels[channel.id].first_message_id is not None
] ]
if len(invalid_target_channels) == 0: if len(invalid_target_channels) == 0:
logging.info(f"log {self.guild.id} > assumed fast")
fast = True fast = True
if self.locked: if self.locked:
self.unlock() self.unlock()
@@ -212,7 +231,9 @@ class GuildLogs:
if channel.id not in self.channels or fresh: if channel.id not in self.channels or fresh:
loading_new += 1 loading_new += 1
self.channels[channel.id] = ChannelLogs(channel, self) self.channels[channel.id] = ChannelLogs(channel, self)
workers += [Worker(self.channels[channel.id], channel)] workers += [
Worker(self.channels[channel.id], channel, start_date, stop_date)
]
warning_msg = "(this might take a while)" warning_msg = "(this might take a while)"
if len(target_channels) > 5 and loading_new > 5: if len(target_channels) > 5 and loading_new > 5:
warning_msg = "(most channels are new, this will take a long while)" warning_msg = "(most channels are new, this will take a long while)"
@@ -253,7 +274,7 @@ class GuildLogs:
f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}", f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
) )
logging.info( logging.info(
f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s" f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
) )
# write logs # write logs
real_total_msg = sum( real_total_msg = sum(
@@ -322,3 +343,46 @@ class GuildLogs:
f"No cancellable analysis are currently running on this server", f"No cancellable analysis are currently running on this server",
reference=message, reference=message,
) )
@staticmethod
def init_log(guild: List[discord.Guild]):
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
if not os.path.exists(filename):
with open(filename, mode="wb") as f:
f.write(gzip.compress(bytes("{}", "utf-8")))
logging.info(f"log {guild.id} > created")
else:
logging.info(f"log {guild.id} > already exists")
@staticmethod
def remove_log(guild: List[discord.Guild]):
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
if os.path.exists(filename):
os.unlink(filename)
logging.info(f"log {guild.id} > removed")
else:
logging.info(f"log {guild.id} > does not exists")
@staticmethod
def check_logs(guilds: List[discord.Guild]):
logging.info(f"checking logs...")
if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR)
guild_ids = [str(guild.id) for guild in guilds]
for item in os.listdir(LOG_DIR):
path = os.path.join(LOG_DIR, item)
name, ext = os.path.splitext(item)
if os.path.isfile(path) and ext == LOG_EXT:
if (
name in guild_ids
and (time.time() - os.path.getmtime(path)) > MAX_MODIFICATION_TIME
):
logging.info(f"> removing old log '{path}'")
os.unlink(path)
elif name not in guild_ids:
logging.info(f"> removing unused log '{path}'")
os.unlink(path)
+5 -1
View File
@@ -8,6 +8,9 @@ IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
EMBED_IMAGES = ["image", "gifv"] EMBED_IMAGES = ["image", "gifv"]
NOT_SERIALIZED = ["channel"]
class MessageLog: class MessageLog:
def __init__(self, message: Union[discord.Message, dict], channel: Any): def __init__(self, message: Union[discord.Message, dict], channel: Any):
self.channel = channel self.channel = channel
@@ -79,7 +82,8 @@ class MessageLog:
def dict(self) -> dict: def dict(self) -> dict:
message = dict(self.__dict__) message = dict(self.__dict__)
message.pop("channel", None) for key in NOT_SERIALIZED:
message.pop(key, None)
message["created_at"] = self.created_at.isoformat() message["created_at"] = self.created_at.isoformat()
message["edited_at"] = ( message["edited_at"] = (
self.edited_at.isoformat() if self.edited_at is not None else None self.edited_at.isoformat() if self.edited_at is not None else None
+29 -9
View File
@@ -6,7 +6,7 @@ if sys.version_info < (3, 7):
print("Please upgrade your Python version to 3.7.0 or higher") print("Please upgrade your Python version to 3.7.0 or higher")
sys.exit(1) sys.exit(1)
from utils import emojis from utils import emojis, gdpr
from scanners import ( from scanners import (
EmotesScanner, EmotesScanner,
FullScanner, FullScanner,
@@ -33,17 +33,43 @@ emojis.load_emojis()
bot = Bot( bot = Bot(
"Discord Analyst", "Discord Analyst",
"1.12", "1.13",
alias="%", alias="%",
) )
bot.log_calls = True bot.log_calls = True
async def on_ready():
GuildLogs.check_logs(bot.client.guilds)
return True
async def on_guild_remove():
GuildLogs.check_logs(bot.client.guilds)
return True
bot.register_event(on_ready)
bot.register_event(on_guild_remove)
bot.register_command( bot.register_command(
"(cancel|stop)", "(cancel|stop)",
GuildLogs.cancel, GuildLogs.cancel,
"cancel: stop current analysis (not launched with fast)", "cancel: stop current analysis (not launched with fast)",
"```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```", "```\n%cancel: Stop current analysis (not launched with fast)\n```",
)
bot.register_command(
"gdpr",
gdpr.process,
"gdpr: displays GDPR information",
gdpr.HELP,
)
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: (BETA) rank words by their usage",
WordsScanner.help(),
) )
bot.register_command( bot.register_command(
"last", "last",
@@ -63,12 +89,6 @@ bot.register_command(
"first: read first message", "first: read first message",
FirstScanner.help(), FirstScanner.help(),
) )
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: rank words by their usage",
WordsScanner.help(),
)
bot.register_command( bot.register_command(
"mentioned", "mentioned",
lambda *args: MentionedScanner().compute(*args), lambda *args: MentionedScanner().compute(*args),
+6 -10
View File
@@ -8,21 +8,17 @@ import discord
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from .scanner import Scanner from .scanner import Scanner
from data_types import Counter from data_types import Counter
from utils import COMMON_HELP_ARGS, mention, channel_mention from utils import generate_help, mention, channel_mention
class ChannelsScanner(Scanner): class ChannelsScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "chan",
+ "%chan: Rank channels by their messages\n" "Rank channels by their messages",
+ "arguments:\n" args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+ COMMON_HELP_ARGS example="5 @user",
+ "* <n> - top <n>, default is 10\n"
+ "* all/everyone - include bots\n"
+ "Example: %chan 10 @user\n"
+ "```"
) )
def __init__(self): def __init__(self):
+2 -10
View File
@@ -8,21 +8,13 @@ import discord
from .scanner import Scanner from .scanner import Scanner
from data_types import Composition from data_types import Composition
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from utils import emojis, COMMON_HELP_ARGS from utils import emojis, generate_help
class CompositionScanner(Scanner): class CompositionScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help("compo", "Show composition statistics")
"```\n"
+ "%compo: Show composition statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %compo #mychannel1 @user\n"
+ "```"
)
def __init__(self): def __init__(self):
super().__init__( super().__init__(
+12 -13
View File
@@ -8,24 +8,23 @@ import discord
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from data_types import Emote, get_emote_dict from data_types import Emote, get_emote_dict
from .scanner import Scanner from .scanner import Scanner
from utils import emojis, COMMON_HELP_ARGS, plural, precise from utils import emojis, generate_help, plural, precise
class EmotesScanner(Scanner): class EmotesScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "emojis",
+ "%emojis: Rank emojis by their usage\n" "Rank emojis by their usage",
+ "arguments:\n" args=[
+ COMMON_HELP_ARGS "<n> - top <n> emojis, default is 20",
+ "* <n> - top <n> emojis, default is 20\n" "all - list all common emojis in addition to this guild's",
+ "* all - list all common emojis in addition to this guild's\n" "members - show top member for each emojis",
+ "* members - show top member for each emojis\n" "sort:usage/reaction - other sorting methods",
+ "* sort:usage/reaction - other sorting methods\n" "everyone - include bots",
+ "* everyone - include bots\n" ],
+ "Example: %emojis 10 all #mychannel1 #mychannel2 @user\n" example="10 all #mychannel1 #mychannel2 @user",
+ "```"
) )
def __init__(self): def __init__(self):
+2 -3
View File
@@ -3,14 +3,13 @@ from typing import List
# Custom libs # Custom libs
from .history_scanner import HistoryScanner from .history_scanner import HistoryScanner
from utils import generate_help
class FirstScanner(HistoryScanner): class FirstScanner(HistoryScanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return super(FirstScanner, FirstScanner).help( return generate_help("first", "Read first message")
cmd="first", text="Read first message"
)
def __init__(self): def __init__(self):
super().__init__(help=FirstScanner.help()) super().__init__(help=FirstScanner.help())
+3 -11
View File
@@ -8,21 +8,13 @@ import discord
from .scanner import Scanner from .scanner import Scanner
from data_types import Frequency from data_types import Frequency
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS from utils import generate_help
class FrequencyScanner(Scanner): class FrequencyScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help("freq", "Show frequency-related statistics")
"```\n"
+ "%freq: Show frequency-related statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %freq #mychannel1 @user\n"
+ "```"
)
def __init__(self): def __init__(self):
super().__init__( super().__init__(
@@ -55,7 +47,7 @@ class FrequencyScanner(Scanner):
freq: Frequency, freq: Frequency,
raw_members: List[int], raw_members: List[int],
*, *,
all_messages: bool all_messages: bool,
) -> bool: ) -> bool:
impacted = False impacted = False
# If author is included in the selection (empty list is all) # If author is included in the selection (empty list is all)
+2 -10
View File
@@ -8,21 +8,13 @@ from .scanner import Scanner
from . import FrequencyScanner, CompositionScanner, PresenceScanner from . import FrequencyScanner, CompositionScanner, PresenceScanner
from data_types import Frequency, Composition, Presence from data_types import Frequency, Composition, Presence
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS from utils import generate_help
class FullScanner(Scanner): class FullScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help("scan", "Show full statistics")
"```\n"
+ "%scan: Show full statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %scan #mychannel1 @user\n"
+ "```"
)
def __init__(self): def __init__(self):
super().__init__( super().__init__(
-13
View File
@@ -7,22 +7,9 @@ import discord
from .scanner import Scanner from .scanner import Scanner
from data_types import History from data_types import History
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS
class HistoryScanner(Scanner, ABC): class HistoryScanner(Scanner, ABC):
@staticmethod
def help(*, cmd: str, text: str) -> str:
return (
"```\n"
+ f"%{cmd}: {text}\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %{cmd} #mychannel1 @user\n"
+ "```"
)
def __init__(self, *, help: str): def __init__(self, *, help: str):
super().__init__( super().__init__(
has_digit_args=True, has_digit_args=True,
+2 -3
View File
@@ -3,14 +3,13 @@ from typing import List
# Custom libs # Custom libs
from .history_scanner import HistoryScanner from .history_scanner import HistoryScanner
from utils import generate_help
class LastScanner(HistoryScanner): class LastScanner(HistoryScanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return super(LastScanner, LastScanner).help( return generate_help("last", "Read last message")
cmd="last", text="Read last message"
)
def __init__(self): def __init__(self):
super().__init__(help=LastScanner.help()) super().__init__(help=LastScanner.help())
+8 -12
View File
@@ -8,22 +8,18 @@ import discord
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from .scanner import Scanner from .scanner import Scanner
from data_types import Counter from data_types import Counter
from utils import COMMON_HELP_ARGS, plural, precise, mention, alt_mention from utils import generate_help, plural, precise, mention, alt_mention
class MentionedScanner(Scanner): class MentionedScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "mentioned",
+ "%mentioned: Rank specific user's mentions by their usage\n" "Rank specific user's mentions by their usage",
+ "arguments:\n" args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+ "* @member/me - (required) one or more member\n" example="5 @user",
+ "\n".join(COMMON_HELP_ARGS.split("\n")[1:]) replace_args=[" @member/me - (required) one or more member"],
+ "* <n> - top <n> mentions, default is 10\n"
+ "* all - include bots mentions\n"
+ "Example: %mentioned 10 @user\n"
+ "```"
) )
def __init__(self): def __init__(self):
@@ -45,7 +41,7 @@ class MentionedScanner(Scanner):
"You need to mention at least one member or use `me`", reference=message "You need to mention at least one member or use `me`", reference=message
) )
return False return False
self.all_mentions = "all" in args self.all_mentions = "all" in args or "everyone" in args
# Create mentions dict # Create mentions dict
self.mentions = defaultdict(Counter) self.mentions = defaultdict(Counter)
return True return True
+10 -11
View File
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
from .scanner import Scanner from .scanner import Scanner
from data_types import Counter from data_types import Counter
from utils import ( from utils import (
COMMON_HELP_ARGS, generate_help,
plural, plural,
precise, precise,
mention, mention,
@@ -22,16 +22,15 @@ from utils import (
class MentionsScanner(Scanner): class MentionsScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "mentions",
+ "%mentions: Rank mentions by their usage\n" "Rank mentions by their usage",
+ "arguments:\n" args=[
+ COMMON_HELP_ARGS "<n> - top <n>, default is 10",
+ "* <n> - top <n> mentions, default is 10\n" "all - show role/channel/everyone/here mentions",
+ "* all - show role/channel/everyone/here mentions\n" "everyone - include bots mentions",
+ "* everyone - include bots mentions\n" ],
+ "Example: %mentions 10 #mychannel1 #mychannel2 @user\n" example="10 #mychannel1 #mychannel2 @user",
+ "```"
) )
def __init__(self): def __init__(self):
+6 -10
View File
@@ -8,21 +8,17 @@ import discord
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from .scanner import Scanner from .scanner import Scanner
from data_types import Counter from data_types import Counter
from utils import COMMON_HELP_ARGS, mention, channel_mention from utils import generate_help, mention, channel_mention
class MessagesScanner(Scanner): class MessagesScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "msg",
+ "%msg: Rank users by their messages\n" "Rank users by their messages",
+ "arguments:\n" args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+ COMMON_HELP_ARGS example="10 #channel",
+ "* <n> - top <n>, default is 10\n"
+ "* all/everyone - include bots\n"
+ "Example: %msg 10 #channel\n"
+ "```"
) )
def __init__(self): def __init__(self):
+2 -10
View File
@@ -7,21 +7,13 @@ import discord
from .scanner import Scanner from .scanner import Scanner
from data_types import Presence from data_types import Presence
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from utils import COMMON_HELP_ARGS from utils import generate_help
class PresenceScanner(Scanner): class PresenceScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help("pres", "Show presence statistics")
"```\n"
+ "%pres: Show presence statistics\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* all/everyone - include bots\n"
+ "Example: %pres #mychannel1 @user\n"
+ "```"
)
def __init__(self): def __init__(self):
super().__init__( super().__init__(
+2 -3
View File
@@ -3,14 +3,13 @@ from typing import List
# Custom libs # Custom libs
from .history_scanner import HistoryScanner from .history_scanner import HistoryScanner
from utils import generate_help
class RandomScanner(HistoryScanner): class RandomScanner(HistoryScanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return super(RandomScanner, RandomScanner).help( return generate_help("rand", "Read a random message")
cmd="rand", text="Read a random message"
)
def __init__(self): def __init__(self):
super().__init__(help=RandomScanner.help()) super().__init__(help=RandomScanner.help())
+6 -9
View File
@@ -8,20 +8,17 @@ import discord
from logs import ChannelLogs, MessageLog from logs import ChannelLogs, MessageLog
from .scanner import Scanner from .scanner import Scanner
from data_types import Counter from data_types import Counter
from utils import COMMON_HELP_ARGS, mention, channel_mention from utils import generate_help, mention, channel_mention
class ReactionsScanner(Scanner): class ReactionsScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "react",
+ "%react: Rank users by their reactions\n" "Rank users by their reactions",
+ "arguments:\n" args=["<n> - top <n>, default is 10"],
+ COMMON_HELP_ARGS example="10 #channel",
+ "* <n> - top <n>, default is 10\n"
+ "Example: %react 10 #channel\n"
+ "```"
) )
def __init__(self): def __init__(self):
+73 -4
View File
@@ -5,8 +5,24 @@ import logging
import re import re
import discord import discord
from utils import no_duplicate, get_intro, delta
from logs import GuildLogs, ChannelLogs, MessageLog, ALREADY_RUNNING, CANCELLED from utils import (
no_duplicate,
get_intro,
delta,
gdpr,
ISO8601_REGEX,
RELATIVE_REGEX,
parse_time,
)
from logs import (
GuildLogs,
ChannelLogs,
MessageLog,
ALREADY_RUNNING,
CANCELLED,
NO_FILE,
)
class Scanner(ABC): class Scanner(ABC):
@@ -47,22 +63,42 @@ class Scanner(ABC):
str(channel.id) for channel in message.channel_mentions str(channel.id) for channel in message.channel_mentions
] ]
str_mentions = [str(member.id) for member in message.mentions] str_mentions = [str(member.id) for member in message.mentions]
dates = []
for i, arg in enumerate(args[1:]): for i, arg in enumerate(args[1:]):
skip_check = False
if re.match(r"^<@!?\d+>$", arg): if re.match(r"^<@!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1] arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(r"^<#!?\d+>$", arg): elif re.match(r"^<#!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1] arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(ISO8601_REGEX, arg) or re.match(RELATIVE_REGEX, arg):
dates += [parse_time(arg)]
skip_check = True
if len(dates) > 2:
await message.channel.send(
f"Too many date arguments: `{arg}`", reference=message
)
return
if ( if (
arg not in self.valid_args + ["me", "here", "fast", "fresh"] arg not in self.valid_args + ["me", "here", "fast", "fresh"]
and (not arg.isdigit() or not self.has_digit_args) and (not arg.isdigit() or not self.has_digit_args)
and arg not in str_channel_mentions and arg not in str_channel_mentions
and arg not in str_mentions and arg not in str_mentions
and not skip_check
): ):
await message.channel.send( await message.channel.send(
f"Unrecognized argument: `{arg}`", reference=message f"Unrecognized argument: `{arg}`", reference=message
) )
return return
self.start_date = None if len(dates) < 1 else min(dates)
self.stop_date = None if len(dates) < 2 else max(dates)
if self.start_date is not None and self.start_date > datetime.now():
await message.channel.send(
f"Start date is after today", reference=message
)
return
# Get selected channels or all of them if no channel arguments # Get selected channels or all of them if no channel arguments
self.channels = no_duplicate(message.channel_mentions) self.channels = no_duplicate(message.channel_mentions)
@@ -94,7 +130,12 @@ class Scanner(ABC):
allowed_mentions=discord.AllowedMentions.none(), allowed_mentions=discord.AllowedMentions.none(),
) )
total_msg, total_chan = await logs.load( total_msg, total_chan = await logs.load(
progress, self.channels, fast="fast" in args, fresh="fresh" in args progress,
self.channels,
self.start_date,
self.stop_date,
fast="fast" in args,
fresh="fresh" in args,
) )
if total_msg == CANCELLED: if total_msg == CANCELLED:
await message.channel.send( await message.channel.send(
@@ -106,7 +147,24 @@ class Scanner(ABC):
"An analysis is already running on this server, please be patient.", "An analysis is already running on this server, please be patient.",
reference=message, reference=message,
) )
elif total_msg == NO_FILE:
await message.channel.send(gdpr.TEXT)
else: else:
if self.start_date is not None and len(logs.channels) > 0:
self.start_date = max(
self.start_date,
min(
[
logs.channels[channel.id].start_date
for channel in self.channels
if channel.id in logs.channels
and logs.channels[channel.id].start_date is not None
]
),
)
if self.stop_date is None:
self.stop_date = datetime.utcnow()
self.msg_count = 0 self.msg_count = 0
self.total_msg = 0 self.total_msg = 0
self.chan_count = 0 self.chan_count = 0
@@ -118,13 +176,21 @@ class Scanner(ABC):
[ [
self.compute_message(channel_logs, message_log) self.compute_message(channel_logs, message_log)
for message_log in channel_logs.messages for message_log in channel_logs.messages
if (
self.start_date is None
or message_log.created_at >= self.start_date
)
and (
self.stop_date is None
or message_log.created_at <= self.stop_date
)
] ]
) )
self.total_msg += len(channel_logs.messages) self.total_msg += len(channel_logs.messages)
self.msg_count += count self.msg_count += count
self.chan_count += 1 if count > 0 else 0 self.chan_count += 1 if count > 0 else 0
logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms") logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
if self.total_msg == 0: if self.msg_count == 0:
await message.channel.send( await message.channel.send(
"There are no messages found matching the filters", "There are no messages found matching the filters",
reference=message, reference=message,
@@ -141,12 +207,15 @@ class Scanner(ABC):
self.members, self.members,
self.msg_count, self.msg_count,
self.chan_count, self.chan_count,
self.start_date,
self.stop_date,
) )
) )
logging.info(f"scan {guild.id} > results in {delta(t0):,}ms") logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
response = "" response = ""
first = True first = True
for r in results: for r in results:
if r:
if len(response + "\n" + r) > 2000: if len(response + "\n" + r) > 2000:
await message.channel.send( await message.channel.send(
response, response,
+16 -22
View File
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
from .scanner import Scanner from .scanner import Scanner
from data_types import Counter from data_types import Counter
from utils import ( from utils import (
COMMON_HELP_ARGS, generate_help,
plural, plural,
precise, precise,
) )
@@ -18,16 +18,15 @@ from utils import (
class WordsScanner(Scanner): class WordsScanner(Scanner):
@staticmethod @staticmethod
def help() -> str: def help() -> str:
return ( return generate_help(
"```\n" "words",
+ "%words: Rank words by their usage\n" "(BETA) Rank words by their usage",
+ "arguments:\n" args=[
+ COMMON_HELP_ARGS "<n> - words containings <n> or more letters, default is 3",
+ "* <n> - words containings <n> or more letters, default is 3\n" "<n2> - top <n2> words, default is 10",
+ "* <n2> - top <n2> words, default is 10\n" "all/everyone - include bots",
+ "* everyone - include bots\n" ],
+ "Example: %words 5 10 #mychannel1 #mychannel2 @user\n" example="5 10 #mychannel1 #mychannel2 @user",
+ "```"
) )
def __init__(self): def __init__(self):
@@ -104,16 +103,13 @@ class WordsScanner(Scanner):
or message.author in raw_members or message.author in raw_members
): ):
impacted = True impacted = True
content = " ".join( content = message.content
[ content = re.sub(r"```.+```", "", content, flags=re.DOTALL)
block content = re.sub(r"`.+`", "", content, flags=re.DOTALL)
for block in message.content.split() content = re.sub(r"\w+:\/\/[^ ]+", "", content)
if not re.match(r"^\w+:\/\/", block)
]
)
for word in re.split("[^\w\-':]", content): for word in re.split("[^\w\-':]", content):
m = re.match( m = re.match(
r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word
) )
if m: if m:
word = m[1].lower() word = m[1].lower()
@@ -126,7 +122,5 @@ class WordsScanner(Scanner):
words[word] = words[word + case] words[word] = words[word + case]
del words[word + case] del words[word + case]
break break
words[word].update_use( words[word].update_use(1, message.created_at)
message.content.count(word), message.created_at
)
return impacted return impacted
+65
View File
@@ -0,0 +1,65 @@
import discord
from logs import GuildLogs
HELP = """```
%gdpr: Displays GDPR information
arguments:
* agree - agree to GDPR
* revoke - remove this server's data
```"""
TEXT = """
__**About Analyst-bot's data usage**__
**TL;DR**
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
**Data collection**
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
- Visible text channel names
- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)
This does __not__ includes:
- Voice channels and not visible channels
- Not visible text messages
- Visible text messages' embedded content, images and other attachments
**Data processing**
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
**Data storage and retain policy**
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
**Data sharing**
Analyst-bot does not share the data collected with any third-party.
**Right to retract**
If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
**Terms agreement**
By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
"""
AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server."
REVOKE_TEXT = "This guild/server's data has been deleted. To run new analysis you must agree to the terms again."
async def process(client: discord.client, message: discord.Message, *args: str):
args = list(args)
if len(args) == 1:
await message.channel.send(TEXT)
elif len(args) > 2:
await message.channel.send(f"Too many arguments", reference=message)
elif args[1] == "help":
await message.channel.send(HELP, reference=message)
elif args[1] in ["agree", "accept"]:
GuildLogs.init_log(message.channel.guild)
await message.channel.send(AGREE_TEXT, reference=message)
elif args[1] in ["revoke", "cancel", "remove", "delete"]:
GuildLogs.remove_log(message.channel.guild)
await message.channel.send(REVOKE_TEXT, reference=message)
else:
await message.channel.send(
f"Unrecognized argument: `{args[1]}`", reference=message
)
+122 -34
View File
@@ -1,19 +1,44 @@
from typing import List, Dict, Union, Optional, Any from calendar import month
from typing import Callable, List, Dict, Union, Optional, Any
import os import os
import logging import logging
import discord import discord
import math import math
from datetime import datetime from datetime import datetime, timedelta
import re
import dateutil.parser
from dateutil.relativedelta import relativedelta
# OTHER # OTHER
COMMON_HELP_ARGS = ( COMMON_HELP_ARGS = [
"" "@member/me - filter for one or more member",
+ "* @member/me - filter for one or more member\n" "#channel/here - filter for one or more channel",
+ "* #channel/here - filter for one or more channel\n" "<date1> - filter after <date1>",
+ "* fast - only read cache\n" "<date2> - filter before <date2>",
+ "* fresh - does not read cache (long)\n" "fast - only read cache",
"fresh - does not read cache (long)",
]
def generate_help(
cmd: str,
info: str,
*,
args=["all/everyone - include bots"],
example="#mychannel1 @user",
replace_args=[],
):
arg_list = "* " + "\n* ".join(
replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
) )
return f"""```
%{cmd}: {info}
arguments:
{arg_list}
(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
Example: %{cmd} {example}
```"""
def delta(t0: datetime): def delta(t0: datetime):
@@ -92,11 +117,19 @@ def no_duplicate(seq: list) -> list:
# DICTS # DICTS
def top_key(d: Dict[Union[str, int], int]) -> Union[str, int]: def top_key(
return sorted(d, key=lambda k: d[k])[-1] d: Dict[Union[str, int], int], key: Optional[Callable] = None
) -> Union[str, int]:
if len(d) == 0:
return None
if key is None:
key = lambda k: d[k]
return sorted(d, key=key)[-1]
def val_sum(d: Dict[Any, int]) -> int: def val_sum(d: Dict[Any, int]) -> int:
if len(d) == 0:
return 0
return sum(d.values()) return sum(d.values())
@@ -135,6 +168,51 @@ def precise(p: float, *, precision: int = 2) -> str:
# DATE FORMATTING # DATE FORMATTING
ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
ISO8601_FULL = "0000-01-01T00:00:00"
def parse_iso_datetime(str_date: str) -> datetime:
if re.match(
"^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
):
str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
return dateutil.parser.parse(str_date)
RELATIVE_REGEX = r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?)?)"
def parse_relative_time(src: str) -> datetime:
timezone_delta = datetime.utcnow() - datetime.now()
if src == "today":
return datetime.today() + timezone_delta
elif src == "yesterday":
return datetime.today() - relativedelta(days=1) + timezone_delta
else:
m = re.match("(\d*)(\w+)", src)
delta = None
value = int(m[1]) if m[1] else 1
unit = m[2][0]
if unit == "h":
delta = relativedelta(hours=value)
elif unit == "d":
delta = relativedelta(days=value)
elif unit == "w":
delta = relativedelta(weeks=value)
elif unit == "m":
delta = relativedelta(months=value)
elif unit == "y":
delta = relativedelta(years=value)
return datetime.utcnow() - delta
def parse_time(src: str) -> datetime:
if re.match(RELATIVE_REGEX, src):
return parse_relative_time(src)
else:
return parse_iso_datetime(src)
def str_date(date: datetime) -> str: def str_date(date: datetime) -> str:
return date.strftime("%d %b. %Y") # 12 Jun. 2018 return date.strftime("%d %b. %Y") # 12 Jun. 2018
@@ -144,29 +222,37 @@ def str_datetime(date: datetime) -> str:
return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018 return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018
def from_now(src: Optional[datetime]) -> str: def str_delta(delay: timedelta) -> str:
if src is None:
return "never"
delay = datetime.utcnow() - src
seconds = delay.seconds seconds = delay.seconds
minutes = seconds // 60 minutes = seconds // 60
hours = minutes // 60 hours = minutes // 60
if delay.days < 1: if delay.days < 1:
if hours < 1: if hours < 1:
if minutes == 0: if minutes == 0:
return "now" return "no time"
elif minutes == 1: elif minutes == 1:
return "a minute ago" return "a minute"
else: else:
return f"{minutes} minutes ago" return f"{minutes} minutes"
elif hours == 1: elif hours == 1:
return "an hour ago" return "an hour"
else: else:
return f"{hours} hours ago" return f"{hours} hours"
elif delay.days == 1: elif delay.days == 1:
return "yesterday" return "one day"
else: else:
return f"{delay.days:,} days ago" return f"{delay.days:,} days"
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
output = str_delta(datetime.utcnow() - src)
if output == "no time":
return "now"
elif output == "one day":
return "yesterday"
return output + " ago"
# APP SPECIFIC # APP SPECIFIC
@@ -179,46 +265,48 @@ def get_intro(
members: List[discord.Member], members: List[discord.Member],
nmm: int, # number of messages impacted nmm: int, # number of messages impacted
nc: int, # number of impacted channels nc: int, # number of impacted channels
start_datetime: datetime,
stop_datetime: datetime,
) -> str: ) -> str:
""" """
Get the introduction sentence of the response Get the introduction sentence of the response
""" """
time_text = ""
if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units # Show all data (members, channels) when it's less than 5 units
if len(members) == 0: if len(members) == 0:
# Full scan of the server # Full scan of the server
if full: if full:
return f"{subject} in this server ({nc} channels, {nmm:,} messages):" return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
elif len(channels) < 5: elif len(channels) < 5:
return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:" return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
else: else:
return ( return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
)
elif len(members) < 5: elif len(members) < 5:
if full: if full:
return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:" return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
elif len(channels) < 5: elif len(channels) < 5:
return ( return (
f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} " f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )
else: else:
return ( return (
f"{aggregate([m.mention for m in members])} on these {len(channels)} channels " f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )
else: else:
if full: if full:
return ( return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
)
elif len(channels) < 5: elif len(channels) < 5:
return ( return (
f"These {len(members)} members on {aggregate([c.mention for c in channels])} " f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )
else: else:
return ( return (
f"These {len(members)} members on these {len(channels)} channels " f"These {len(members)} members on these {len(channels)} channels "
f"{subject.lower()} in {nmm:,} messages:" f"{subject.lower()} in {nmm:,} messages{time_text}:"
) )