Merge pull request #32 from Klemek/dev

v1.13
2021-04-09 19:51:54 +02:00
parent 8cc0e1fe65 76af4661ed
commit 0399fd8e61
30 changed files with 728 additions and 414 deletions
@@ -0,0 +1,26 @@
 name: Python
 on: ["push", "pull_request"]
 jobs:
  syntax:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: [3.7, 3.8, 3.9]
    steps:
    - uses: actions/checkout@v2
    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v2
      with:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        python -m pip install flake8
    - name: Lint with flake8
      run: |
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
@@ -21,6 +21,7 @@
 * %first - read first message
 * %rand - read a random message
 * %last - read last message
 * %gdpr - displays GDPR information
 * %emojis - rank emotes by their usage
  * arguments:
    * <n> - top <n> emojis, default is 20
@@ -43,7 +44,7 @@
 * %react - rank users by their reactions
  * arguments:
    * <n> - top <n> messages, default is 10
-* %words - rank words by their usage
+* %words - (BETA) rank words by their usage
  * arguments:
    * <n> - words containings <n> or more letters, default is 3
    * <n2> - top <n2> words, default is 10
@@ -52,9 +53,13 @@
 * Common arguments:
    * @member/me: filter for one or more member
    * #channel/here: filter for one or more channel
    * <date1> - filter after <date1>
    * <date2> - filter before <date2>
    * all/everyone - include bots messages
    * fast: only read cache
    * fresh: does not read cache
 (Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
 ```
 ## Running this bot
@@ -104,6 +109,12 @@ python3 src/main.py
 ## Changelog
 * **v1.13**
  * improved scan `%words`
  * remove old and unused logs at start and guild leaving
  * GDPR disclaimer before scanning
  * start and stop dates
  * bug fix and improvements
 * **v1.12**
  * more scans: `%words`
  * concurrent `fast` analysis
@@ -1,3 +1,4 @@
 discord.py
 python-dotenv
 python-dateutil
 git+git://github.com/Klemek/miniscord.git
@@ -23,49 +23,45 @@ class Composition:
        self.spoilers = 0
    def to_string(self, msg_count: int) -> List[str]:
        ret = []
        ret += [
            f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}"
        ]
        if self.plain_text > 0:
            ret += [
                f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
            ]
        if self.edited > 0:
            ret += [
                f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
            ]
        if self.everyone > 0:
            ret += [
                f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
            ]
        if self.mentions > 0:
            ret += [
                f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)",
            ]
        if self.answers > 0:
            ret += [
                f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
            ]
        total_emotes = val_sum(self.emotes)
        if total_emotes > 0:
        top_emote = top_key(self.emotes)
-            ret += [
+        ret = [
-                f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)",
+            f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}",
-                f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})",
+            f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
-            ]
+            if self.plain_text > 0
-            if self.emote_only > 0:
+            else "",
-                ret += [
+            f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
            if self.edited > 0
            else "",
            f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
            if self.everyone > 0
            else "",
            f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)"
            if self.mentions > 0
            else "",
            f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
            if self.answers > 0
            else "",
            f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)"
            if total_emotes > 0
            else "",
            f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})"
            if total_emotes > 0
            else "",
            f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
-                ]
+            if self.emote_only > 0
-        if self.images > 0:
+            else "",
-            ret += [f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"]
+            f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"
-        if self.links > 0:
+            if self.images > 0
-            ret += [f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"]
+            else "",
-        if self.spoilers > 0:
+            f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"
-            ret += [
+            if self.links > 0
            else "",
            f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
            if self.spoilers > 0
            else "",
            f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"
            if self.tts > 0
            else "",
        ]
        if self.tts > 0:
            ret += [f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"]
        return ret
@@ -38,7 +38,10 @@ class Frequency:
        *,
        member_specific: bool,
    ) -> List[str]:
        self.dates.sort()
        delta = self.dates[-1] - self.dates[0]
        if delta.days == 0:
            delta = timedelta(days=1)
        total_msg = len(self.dates)
        busiest_weekday = top_key(self.week)
        busiest_hour = top_key(self.day)
@@ -46,7 +49,7 @@ class Frequency:
        if (
            self.dates[0].weekday() <= busiest_weekday
            and self.dates[-1].weekday() >= busiest_weekday
-        ):
+        ) or n_weekdays == 0:
            n_weekdays += 1
        n_hours = delta.days
        if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:
@@ -56,19 +59,16 @@ class Frequency:
            f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
            f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
            f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
-            f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)",
+            f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
            if self.busiest_day is not None
            else "",
            f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
            f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
            f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
            f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})",
            f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg",
        ]
        if member_specific:
            ret += [
            f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
-            ]
+            if member_specific
-        else:
+            else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})",
            ret += [
                f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})"
        ]
        return ret
@@ -25,74 +25,70 @@ class Presence:
        show_top_channel: bool,
        member_specific: bool,
    ) -> List[str]:
        ret = []
        if chan_count is None:
            type = "server's"
        elif chan_count == 1:
            type = "channel's"
        else:
            type = "channels'"
        if member_specific:
            ret += [
                f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
            ]
        else:
        top_member = top_key(self.messages)
            ret += [
                f"- **top messages**:  {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})"
            ]
        if show_top_channel:
        top_channel = top_key(self.channel_usage)
        channel_sum = val_sum(self.channel_usage)
-            found_in = sorted(
+        found_in = top_key(
            self.channel_usage,
            key=lambda k: self.channel_usage[k] / self.channel_total[k],
-            )[-1]
+        )
            ret += [
                f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})",
            ]
            if member_specific:
                ret += [
                    f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
                ]
        if member_specific:
            if len(self.mentions) > 0:
        top_mention = top_key(self.mentions)
        mention_sum = val_sum(self.mentions)
-                ret += [
+        top_mention_others = top_key(self.mention_others)
-                    f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
+        mention_others_sum = val_sum(self.mention_others)
-                    f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})",
+        top_member_mentioned = top_key(self.mention_count)
-                ]
+        total_reaction_used = val_sum(self.reactions)
        if len(self.mention_others) > 0:
            top_mention = top_key(self.mention_others)
            mention_sum = val_sum(self.mention_others)
            if member_specific:
                ret += [
                    f"- **mentioned others**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
                    f"- **mostly mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
                ]
            else:
                top_member = top_key(self.mention_count)
                ret += [
                    f"- **mentioned**: {plural(mention_sum, 'time')} ({mention(top_member)}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
                    f"- **top mentions**: {mention(top_member)} ({plural(self.mention_count[top_member], 'time')}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
                    f"- **most mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
                ]
        if len(self.reactions) > 0:
            total_used = val_sum(self.reactions)
        top_reaction = top_key(self.reactions)
-            ret += [
+        top_reaction_member = top_key(self.used_reaction)
-                f"- **reactions**: {plural(total_used, 'time')}",
+
-                f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_used)})",
+        ret = [
            f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
            if member_specific
            else f"- **top messages**:  {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})",
            f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
            if show_top_channel
            else "",
            f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
            if show_top_channel and member_specific
            else "",
            f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
            if member_specific and len(self.mentions) > 0
            else "",
            f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
            if member_specific and len(self.mentions) > 0
            else "",
            f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
            if len(self.mention_others) > 0 and member_specific
            else "",
            f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
            if len(self.mention_others) > 0 and member_specific
            else "",
            f"- **mentioned**: {plural(mention_others_sum, 'time')} ({mention(top_member_mentioned)}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
            if len(self.mention_others) > 0 and not member_specific
            else "",
            f"- **top mentions**: {mention(top_member_mentioned)} ({plural(self.mention_count[top_member_mentioned], 'time')}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
            if len(self.mention_others) > 0 and not member_specific
            else "",
            f"- **most mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
            if len(self.mention_others) > 0 and not member_specific
            else "",
            f"- **reactions**: {plural(total_reaction_used, 'time')}"
            if len(self.reactions) > 0 and not member_specific
            else "",
            f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
            if len(self.reactions) > 0 and member_specific
            else "",
            f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
            if len(self.reactions) > 0 and not member_specific
            else "",
            f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_reaction_used)})"
            if len(self.reactions) > 0
            else "",
        ]
            if member_specific:
                ret[
                    -2
                ] += f" ({percent(total_used/val_sum(self.used_reaction))} of {type})"
            else:
                top_member = top_key(self.used_reaction)
                ret.insert(
                    -1,
                    f"- **top reactions**: {mention(top_member)} ({plural(self.used_reaction[top_member], 'time')}, {percent(self.used_reaction[top_member]/val_sum(self.used_reaction))})",
                )
        return ret
@@ -1,3 +1,3 @@
 from .message_log import MessageLog
 from .channel_logs import ChannelLogs
-from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED
+from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED, NO_FILE
@@ -1,5 +1,7 @@
 from typing import Union, Tuple, Any
 import discord
 from discord import message
 from datetime import datetime
 from . import MessageLog
 from utils import FakeMessage
@@ -7,6 +9,8 @@ from utils import FakeMessage
 CHUNK_SIZE = 2000
 FORMAT = 3
 NOT_SERIALIZED = ["channel", "guild", "start_date"]
 class ChannelLogs:
    def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -15,8 +19,10 @@ class ChannelLogs:
            self.id = channel.id
            self.name = channel.name
            self.last_message_id = None
            self.first_message_id = None
            self.format = FORMAT
            self.messages = []
            self.start_date = None
        elif isinstance(channel, dict):
            self.format = channel["format"] if "format" in channel else None
            if not self.is_format():
@@ -28,63 +34,102 @@ class ChannelLogs:
                if channel["last_message_id"] is not None
                else None
            )
            self.first_message_id = (
                int(channel["first_message_id"])
                if "first_message_id" in channel
                and channel["first_message_id"] is not None
                else None
            )
            self.messages = [
                MessageLog(message, self) for message in channel["messages"]
            ]
            self.start_date = (
                self.messages[-1].created_at if len(self.messages) > 0 else None
            )
    def is_format(self):
        return self.format == FORMAT
-    async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
+    async def load(
        self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
    ) -> Tuple[int, int]:
        self.name = channel.name
        self.channel = channel
        is_empty = self.last_message_id is None
        try:
-            if self.last_message_id is not None:  # append
+            if is_empty:
                sanity_check = len(await channel.history(limit=1).flatten())
                if sanity_check != 1:
                    yield len(self.messages), True
                    return
            # load backward
            if is_empty or (
                self.first_message_id is not None
                and (
                    start_date is None
                    or (self.start_date is not None and self.start_date > start_date)
                )
            ):
                first_message_date = None
                tmp_message_id = 0
                done = 0
                while (
                    first_message_date is None
                    or (
                        done >= CHUNK_SIZE
                        and (start_date is None or first_message_date > start_date)
                    )
                ) and tmp_message_id != self.first_message_id:
                    tmp_message_id = self.first_message_id
                    done = 0
                    async for message in channel.history(
                        limit=CHUNK_SIZE,
                        before=FakeMessage(self.first_message_id)
                        if self.first_message_id is not None
                        else None,
                        oldest_first=False,
                    ):
                        done += 1
                        self.first_message_id = message.id
                        first_message_date = message.created_at
                        m = MessageLog(message, self)
                        await m.load(message)
                        self.messages += [m]
                    yield len(self.messages), False
                if done < CHUNK_SIZE:  # reached bottom
                    self.first_message_id = None
                self.last_message_id = channel.last_message_id
            # load forward
            last_message_date = self.messages[0].created_at
            if not is_empty and (stop_date is None or last_message_date < stop_date):
                tmp_message_id = None
                while (
                    self.last_message_id != channel.last_message_id
-                    and self.last_message_id != tmp_message_id
+                    and (stop_date is None or last_message_date < stop_date)
-                ):
+                ) and self.last_message_id != tmp_message_id:
                    tmp_message_id = self.last_message_id
                    async for message in channel.history(
                        limit=CHUNK_SIZE,
                        after=FakeMessage(self.last_message_id),
                        oldest_first=True,
                    ):
                        last_message_date = message.created_at
                        self.last_message_id = message.id
                        m = MessageLog(message, self)
                        await m.load(message)
                        self.messages.insert(0, m)
                    yield len(self.messages), False
            else:  # first load
                last_message_id = None
                done = 0
                sanity_check = len(await channel.history(limit=1).flatten())
                if sanity_check == 1:
                    while done >= CHUNK_SIZE or last_message_id is None:
                        done = 0
                        async for message in channel.history(
                            limit=CHUNK_SIZE,
                            before=FakeMessage(last_message_id)
                            if last_message_id is not None
                            else None,
                            oldest_first=False,
                        ):
                            done += 1
                            last_message_id = message.id
                            m = MessageLog(message, self)
                            await m.load(message)
                            self.messages += [m]
                        yield len(self.messages), False
                    self.last_message_id = channel.last_message_id
        except discord.errors.HTTPException:
            yield -1, True
            return  # When an exception occurs (like Forbidden)
        self.start_date = (
            self.messages[-1].created_at if len(self.messages) > 0 else None
        )
        yield len(self.messages), True
    def dict(self) -> dict:
        channel = dict(self.__dict__)
-        channel.pop("channel", None)
+        for key in NOT_SERIALIZED:
-        channel.pop("guild", None)
+            channel.pop(key, None)
        channel["messages"] = [message.dict() for message in self.messages]
        return channel
@@ -15,6 +15,7 @@ from utils import code_message, delta, deltas
 LOG_DIR = "logs"
 LOG_EXT = ".logz"
 current_analysis = []
 current_analysis_lock = threading.Lock()
@@ -22,12 +23,22 @@ current_analysis_lock = threading.Lock()
 ALREADY_RUNNING = -100
 CANCELLED = -200
 NO_FILE = -300
 # 5 minutes, assume 'fast' arg
 MIN_MODIFICATION_TIME = 5 * 60
 # ~1 year, remove log file
 MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
 class Worker:
-    def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
+    def __init__(
        self,
        channel_log: ChannelLogs,
        channel: discord.TextChannel,
        start_date: datetime,
        stop_date: datetime,
    ):
        self.channel_log = channel_log
        self.channel = channel
        self.start_msg = len(channel_log.messages)
@@ -36,12 +47,16 @@ class Worker:
        self.done = False
        self.cancelled = False
        self.loop = asyncio.get_event_loop()
        self.start_date = start_date
        self.stop_date = stop_date
    def start(self):
        asyncio.run_coroutine_threadsafe(self.process(), self.loop)
    async def process(self):
-        async for count, done in self.channel_log.load(self.channel):
+        async for count, done in self.channel_log.load(
            self.channel, self.start_date, self.stop_date
        ):
            if count > 0:
                self.queried_msg = count - self.start_msg
                self.total_msg = count
@@ -54,7 +69,7 @@ class GuildLogs:
    def __init__(self, guild: discord.Guild):
        self.id = guild.id
        self.guild = guild
-        self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
+        self.log_file = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
        self.channels = {}
        self.locked = False
@@ -74,16 +89,17 @@ class GuildLogs:
        return self.locked and self.log_file not in current_analysis
    def lock(self) -> bool:
        self.locked = True
        current_analysis_lock.acquire()
        if self.log_file in current_analysis:
            current_analysis_lock.release()
            return False
        self.locked = True
        current_analysis.append(self.log_file)
        current_analysis_lock.release()
        return True
    def unlock(self):
        if self.locked:
            self.locked = False
            current_analysis_lock.acquire()
            if self.log_file in current_analysis:
@@ -93,7 +109,9 @@ class GuildLogs:
    async def load(
        self,
        progress: discord.Message,
-        target_channels: List[discord.TextChannel] = [],
+        target_channels: List[discord.TextChannel],
        start_date: datetime,
        stop_date: datetime,
        *,
        fast: bool,
        fresh: bool,
@@ -106,7 +124,8 @@ class GuildLogs:
        if not os.path.exists(LOG_DIR):
            os.mkdir(LOG_DIR)
        last_time = None
-        if os.path.exists(self.log_file):
+        if not os.path.exists(self.log_file):
            return NO_FILE, 0
        channels = {}
        try:
            last_time = os.path.getmtime(self.log_file)
@@ -122,9 +141,7 @@ class GuildLogs:
            t0 = datetime.now()
            json_data = gzip.decompress(gziped_data)
            del gziped_data
-                logging.info(
+            logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
                    f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
                )
            if self.check_cancelled():
                return CANCELLED, 0
            await code_message(progress, "Reading saved history (3/4)...")
@@ -150,8 +167,6 @@ class GuildLogs:
            logging.error(f"log {self.guild.id} > invalid JSON")
        except IOError:
            logging.error(f"log {self.guild.id} > cannot read")
        else:
            fast = False
        if len(target_channels) == 0:
            target_channels = (
@@ -171,6 +186,8 @@ class GuildLogs:
        if (
            not fast
            and not fresh
            and start_date is None
            and stop_date is None
            and last_time is not None
            and (time.time() - last_time) < MIN_MODIFICATION_TIME
        ):
@@ -178,8 +195,10 @@ class GuildLogs:
                channel
                for channel in target_channels
                if channel.id not in self.channels
                or self.channels[channel.id].first_message_id is not None
            ]
            if len(invalid_target_channels) == 0:
                logging.info(f"log {self.guild.id} > assumed fast")
                fast = True
                if self.locked:
                    self.unlock()
@@ -212,7 +231,9 @@ class GuildLogs:
                if channel.id not in self.channels or fresh:
                    loading_new += 1
                    self.channels[channel.id] = ChannelLogs(channel, self)
-                workers += [Worker(self.channels[channel.id], channel)]
+                workers += [
                    Worker(self.channels[channel.id], channel, start_date, stop_date)
                ]
            warning_msg = "(this might take a while)"
            if len(target_channels) > 5 and loading_new > 5:
                warning_msg = "(most channels are new, this will take a long while)"
@@ -253,7 +274,7 @@ class GuildLogs:
                    f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
                )
            logging.info(
-                f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
+                f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
            )
            # write logs
            real_total_msg = sum(
@@ -322,3 +343,46 @@ class GuildLogs:
                f"No cancellable analysis are currently running on this server",
                reference=message,
            )
    @staticmethod
    def init_log(guild: List[discord.Guild]):
        if not os.path.exists(LOG_DIR):
            os.mkdir(LOG_DIR)
        filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
        if not os.path.exists(filename):
            with open(filename, mode="wb") as f:
                f.write(gzip.compress(bytes("{}", "utf-8")))
            logging.info(f"log {guild.id} > created")
        else:
            logging.info(f"log {guild.id} > already exists")
    @staticmethod
    def remove_log(guild: List[discord.Guild]):
        if not os.path.exists(LOG_DIR):
            os.mkdir(LOG_DIR)
        filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
        if os.path.exists(filename):
            os.unlink(filename)
            logging.info(f"log {guild.id} > removed")
        else:
            logging.info(f"log {guild.id} > does not exists")
    @staticmethod
    def check_logs(guilds: List[discord.Guild]):
        logging.info(f"checking logs...")
        if not os.path.exists(LOG_DIR):
            os.mkdir(LOG_DIR)
        guild_ids = [str(guild.id) for guild in guilds]
        for item in os.listdir(LOG_DIR):
            path = os.path.join(LOG_DIR, item)
            name, ext = os.path.splitext(item)
            if os.path.isfile(path) and ext == LOG_EXT:
                if (
                    name in guild_ids
                    and (time.time() - os.path.getmtime(path)) > MAX_MODIFICATION_TIME
                ):
                    logging.info(f"> removing old log '{path}'")
                    os.unlink(path)
                elif name not in guild_ids:
                    logging.info(f"> removing unused log '{path}'")
                    os.unlink(path)
@@ -8,6 +8,9 @@ IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
 EMBED_IMAGES = ["image", "gifv"]
 NOT_SERIALIZED = ["channel"]
 class MessageLog:
    def __init__(self, message: Union[discord.Message, dict], channel: Any):
        self.channel = channel
@@ -79,7 +82,8 @@ class MessageLog:
    def dict(self) -> dict:
        message = dict(self.__dict__)
-        message.pop("channel", None)
+        for key in NOT_SERIALIZED:
            message.pop(key, None)
        message["created_at"] = self.created_at.isoformat()
        message["edited_at"] = (
            self.edited_at.isoformat() if self.edited_at is not None else None
@@ -6,7 +6,7 @@ if sys.version_info < (3, 7):
    print("Please upgrade your Python version to 3.7.0 or higher")
    sys.exit(1)
-from utils import emojis
+from utils import emojis, gdpr
 from scanners import (
    EmotesScanner,
    FullScanner,
@@ -33,17 +33,43 @@ emojis.load_emojis()
 bot = Bot(
    "Discord Analyst",
-    "1.12",
+    "1.13",
    alias="%",
 )
 bot.log_calls = True
 async def on_ready():
    GuildLogs.check_logs(bot.client.guilds)
    return True
 async def on_guild_remove():
    GuildLogs.check_logs(bot.client.guilds)
    return True
 bot.register_event(on_ready)
 bot.register_event(on_guild_remove)
 bot.register_command(
    "(cancel|stop)",
    GuildLogs.cancel,
    "cancel: stop current analysis (not launched with fast)",
-    "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
+    "```\n%cancel: Stop current analysis (not launched with fast)\n```",
 )
 bot.register_command(
    "gdpr",
    gdpr.process,
    "gdpr: displays GDPR information",
    gdpr.HELP,
 )
 bot.register_command(
    "words",
    lambda *args: WordsScanner().compute(*args),
    "words: (BETA) rank words by their usage",
    WordsScanner.help(),
 )
 bot.register_command(
    "last",
@@ -63,12 +89,6 @@ bot.register_command(
    "first: read first message",
    FirstScanner.help(),
 )
 bot.register_command(
    "words",
    lambda *args: WordsScanner().compute(*args),
    "words: rank words by their usage",
    WordsScanner.help(),
 )
 bot.register_command(
    "mentioned",
    lambda *args: MentionedScanner().compute(*args),
@@ -8,21 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention
 class ChannelsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "chan",
-            + "%chan: Rank channels by their messages\n"
+            "Rank channels by their messages",
-            + "arguments:\n"
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
-            + COMMON_HELP_ARGS
+            example="5 @user",
            + "* <n> - top <n>, default is 10\n"
            + "* all/everyone - include bots\n"
            + "Example: %chan 10 @user\n"
            + "```"
        )
    def __init__(self):
@@ -8,21 +8,13 @@ import discord
 from .scanner import Scanner
 from data_types import Composition
 from logs import ChannelLogs, MessageLog
-from utils import emojis, COMMON_HELP_ARGS
+from utils import emojis, generate_help
 class CompositionScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help("compo", "Show composition statistics")
            "```\n"
            + "%compo: Show composition statistics\n"
            + "arguments:\n"
            + COMMON_HELP_ARGS
            + "* all/everyone - include bots\n"
            + "Example: %compo #mychannel1 @user\n"
            + "```"
        )
    def __init__(self):
        super().__init__(
@@ -8,24 +8,23 @@ import discord
 from logs import ChannelLogs, MessageLog
 from data_types import Emote, get_emote_dict
 from .scanner import Scanner
-from utils import emojis, COMMON_HELP_ARGS, plural, precise
+from utils import emojis, generate_help, plural, precise
 class EmotesScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "emojis",
-            + "%emojis: Rank emojis by their usage\n"
+            "Rank emojis by their usage",
-            + "arguments:\n"
+            args=[
-            + COMMON_HELP_ARGS
+                "<n> - top <n> emojis, default is 20",
-            + "* <n> - top <n> emojis, default is 20\n"
+                "all - list all common emojis in addition to this guild's",
-            + "* all - list all common emojis in addition to this guild's\n"
+                "members - show top member for each emojis",
-            + "* members - show top member for each emojis\n"
+                "sort:usage/reaction - other sorting methods",
-            + "* sort:usage/reaction - other sorting methods\n"
+                "everyone - include bots",
-            + "* everyone - include bots\n"
+            ],
-            + "Example: %emojis 10 all #mychannel1 #mychannel2 @user\n"
+            example="10 all #mychannel1 #mychannel2 @user",
            + "```"
        )
    def __init__(self):
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs
 from .history_scanner import HistoryScanner
 from utils import generate_help
 class FirstScanner(HistoryScanner):
    @staticmethod
    def help() -> str:
-        return super(FirstScanner, FirstScanner).help(
+        return generate_help("first", "Read first message")
            cmd="first", text="Read first message"
        )
    def __init__(self):
        super().__init__(help=FirstScanner.help())
@@ -8,21 +8,13 @@ import discord
 from .scanner import Scanner
 from data_types import Frequency
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help
 class FrequencyScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help("freq", "Show frequency-related statistics")
            "```\n"
            + "%freq: Show frequency-related statistics\n"
            + "arguments:\n"
            + COMMON_HELP_ARGS
            + "* all/everyone - include bots\n"
            + "Example: %freq #mychannel1 @user\n"
            + "```"
        )
    def __init__(self):
        super().__init__(
@@ -55,7 +47,7 @@ class FrequencyScanner(Scanner):
        freq: Frequency,
        raw_members: List[int],
        *,
-        all_messages: bool
+        all_messages: bool,
    ) -> bool:
        impacted = False
        # If author is included in the selection (empty list is all)
@@ -8,21 +8,13 @@ from .scanner import Scanner
 from . import FrequencyScanner, CompositionScanner, PresenceScanner
 from data_types import Frequency, Composition, Presence
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help
 class FullScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help("scan", "Show full statistics")
            "```\n"
            + "%scan: Show full statistics\n"
            + "arguments:\n"
            + COMMON_HELP_ARGS
            + "* all/everyone - include bots\n"
            + "Example: %scan #mychannel1 @user\n"
            + "```"
        )
    def __init__(self):
        super().__init__(
@@ -7,22 +7,9 @@ import discord
 from .scanner import Scanner
 from data_types import History
 from logs import ChannelLogs, MessageLog
 from utils import COMMON_HELP_ARGS
 class HistoryScanner(Scanner, ABC):
    @staticmethod
    def help(*, cmd: str, text: str) -> str:
        return (
            "```\n"
            + f"%{cmd}: {text}\n"
            + "arguments:\n"
            + COMMON_HELP_ARGS
            + "* all/everyone - include bots\n"
            + "Example: %{cmd} #mychannel1 @user\n"
            + "```"
        )
    def __init__(self, *, help: str):
        super().__init__(
            has_digit_args=True,
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs
 from .history_scanner import HistoryScanner
 from utils import generate_help
 class LastScanner(HistoryScanner):
    @staticmethod
    def help() -> str:
-        return super(LastScanner, LastScanner).help(
+        return generate_help("last", "Read last message")
            cmd="last", text="Read last message"
        )
    def __init__(self):
        super().__init__(help=LastScanner.help())
@@ -8,22 +8,18 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, plural, precise, mention, alt_mention
+from utils import generate_help, plural, precise, mention, alt_mention
 class MentionedScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "mentioned",
-            + "%mentioned: Rank specific user's mentions by their usage\n"
+            "Rank specific user's mentions by their usage",
-            + "arguments:\n"
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
-            + "* @member/me - (required) one or more member\n"
+            example="5 @user",
-            + "\n".join(COMMON_HELP_ARGS.split("\n")[1:])
+            replace_args=[" @member/me - (required) one or more member"],
            + "* <n> - top <n> mentions, default is 10\n"
            + "* all - include bots mentions\n"
            + "Example: %mentioned 10 @user\n"
            + "```"
        )
    def __init__(self):
@@ -45,7 +41,7 @@ class MentionedScanner(Scanner):
                "You need to mention at least one member or use `me`", reference=message
            )
            return False
-        self.all_mentions = "all" in args
+        self.all_mentions = "all" in args or "everyone" in args
        # Create mentions dict
        self.mentions = defaultdict(Counter)
        return True
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
 from utils import (
-    COMMON_HELP_ARGS,
+    generate_help,
    plural,
    precise,
    mention,
@@ -22,16 +22,15 @@ from utils import (
 class MentionsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "mentions",
-            + "%mentions: Rank mentions by their usage\n"
+            "Rank mentions by their usage",
-            + "arguments:\n"
+            args=[
-            + COMMON_HELP_ARGS
+                "<n> - top <n>, default is 10",
-            + "* <n> - top <n> mentions, default is 10\n"
+                "all - show role/channel/everyone/here mentions",
-            + "* all - show role/channel/everyone/here mentions\n"
+                "everyone - include bots mentions",
-            + "* everyone - include bots mentions\n"
+            ],
-            + "Example: %mentions 10 #mychannel1 #mychannel2 @user\n"
+            example="10 #mychannel1 #mychannel2 @user",
            + "```"
        )
    def __init__(self):
@@ -8,21 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention
 class MessagesScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "msg",
-            + "%msg: Rank users by their messages\n"
+            "Rank users by their messages",
-            + "arguments:\n"
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
-            + COMMON_HELP_ARGS
+            example="10 #channel",
            + "* <n> - top <n>, default is 10\n"
            + "* all/everyone - include bots\n"
            + "Example: %msg 10 #channel\n"
            + "```"
        )
    def __init__(self):
@@ -7,21 +7,13 @@ import discord
 from .scanner import Scanner
 from data_types import Presence
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help
 class PresenceScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help("pres", "Show presence statistics")
            "```\n"
            + "%pres: Show presence statistics\n"
            + "arguments:\n"
            + COMMON_HELP_ARGS
            + "* all/everyone - include bots\n"
            + "Example: %pres #mychannel1 @user\n"
            + "```"
        )
    def __init__(self):
        super().__init__(
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs
 from .history_scanner import HistoryScanner
 from utils import generate_help
 class RandomScanner(HistoryScanner):
    @staticmethod
    def help() -> str:
-        return super(RandomScanner, RandomScanner).help(
+        return generate_help("rand", "Read a random message")
            cmd="rand", text="Read a random message"
        )
    def __init__(self):
        super().__init__(help=RandomScanner.help())
@@ -8,20 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention
 class ReactionsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "react",
-            + "%react: Rank users by their reactions\n"
+            "Rank users by their reactions",
-            + "arguments:\n"
+            args=["<n> - top <n>, default is 10"],
-            + COMMON_HELP_ARGS
+            example="10 #channel",
            + "* <n> - top <n>, default is 10\n"
            + "Example: %react 10 #channel\n"
            + "```"
        )
    def __init__(self):
@@ -5,8 +5,24 @@ import logging
 import re
 import discord
-from utils import no_duplicate, get_intro, delta
+
-from logs import GuildLogs, ChannelLogs, MessageLog, ALREADY_RUNNING, CANCELLED
+from utils import (
    no_duplicate,
    get_intro,
    delta,
    gdpr,
    ISO8601_REGEX,
    RELATIVE_REGEX,
    parse_time,
 )
 from logs import (
    GuildLogs,
    ChannelLogs,
    MessageLog,
    ALREADY_RUNNING,
    CANCELLED,
    NO_FILE,
 )
 class Scanner(ABC):
@@ -47,22 +63,42 @@ class Scanner(ABC):
                str(channel.id) for channel in message.channel_mentions
            ]
            str_mentions = [str(member.id) for member in message.mentions]
            dates = []
            for i, arg in enumerate(args[1:]):
                skip_check = False
                if re.match(r"^<@!?\d+>$", arg):
                    arg = arg[3:-1] if "!" in arg else arg[2:-1]
                elif re.match(r"^<#!?\d+>$", arg):
                    arg = arg[3:-1] if "!" in arg else arg[2:-1]
                elif re.match(ISO8601_REGEX, arg) or re.match(RELATIVE_REGEX, arg):
                    dates += [parse_time(arg)]
                    skip_check = True
                    if len(dates) > 2:
                        await message.channel.send(
                            f"Too many date arguments: `{arg}`", reference=message
                        )
                        return
                if (
                    arg not in self.valid_args + ["me", "here", "fast", "fresh"]
                    and (not arg.isdigit() or not self.has_digit_args)
                    and arg not in str_channel_mentions
                    and arg not in str_mentions
                    and not skip_check
                ):
                    await message.channel.send(
                        f"Unrecognized argument: `{arg}`", reference=message
                    )
                    return
            self.start_date = None if len(dates) < 1 else min(dates)
            self.stop_date = None if len(dates) < 2 else max(dates)
            if self.start_date is not None and self.start_date > datetime.now():
                await message.channel.send(
                    f"Start date is after today", reference=message
                )
                return
            # Get selected channels or all of them if no channel arguments
            self.channels = no_duplicate(message.channel_mentions)
@@ -94,7 +130,12 @@ class Scanner(ABC):
                    allowed_mentions=discord.AllowedMentions.none(),
                )
                total_msg, total_chan = await logs.load(
-                    progress, self.channels, fast="fast" in args, fresh="fresh" in args
+                    progress,
                    self.channels,
                    self.start_date,
                    self.stop_date,
                    fast="fast" in args,
                    fresh="fresh" in args,
                )
                if total_msg == CANCELLED:
                    await message.channel.send(
@@ -106,7 +147,24 @@ class Scanner(ABC):
                        "An analysis is already running on this server, please be patient.",
                        reference=message,
                    )
                elif total_msg == NO_FILE:
                    await message.channel.send(gdpr.TEXT)
                else:
                    if self.start_date is not None and len(logs.channels) > 0:
                        self.start_date = max(
                            self.start_date,
                            min(
                                [
                                    logs.channels[channel.id].start_date
                                    for channel in self.channels
                                    if channel.id in logs.channels
                                    and logs.channels[channel.id].start_date is not None
                                ]
                            ),
                        )
                        if self.stop_date is None:
                            self.stop_date = datetime.utcnow()
                    self.msg_count = 0
                    self.total_msg = 0
                    self.chan_count = 0
@@ -118,13 +176,21 @@ class Scanner(ABC):
                                [
                                    self.compute_message(channel_logs, message_log)
                                    for message_log in channel_logs.messages
                                    if (
                                        self.start_date is None
                                        or message_log.created_at >= self.start_date
                                    )
                                    and (
                                        self.stop_date is None
                                        or message_log.created_at <= self.stop_date
                                    )
                                ]
                            )
                            self.total_msg += len(channel_logs.messages)
                            self.msg_count += count
                            self.chan_count += 1 if count > 0 else 0
                    logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
-                    if self.total_msg == 0:
+                    if self.msg_count == 0:
                        await message.channel.send(
                            "There are no messages found matching the filters",
                            reference=message,
@@ -141,12 +207,15 @@ class Scanner(ABC):
                                self.members,
                                self.msg_count,
                                self.chan_count,
                                self.start_date,
                                self.stop_date,
                            )
                        )
                        logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
                        response = ""
                        first = True
                        for r in results:
                            if r:
                                if len(response + "\n" + r) > 2000:
                                    await message.channel.send(
                                        response,
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
 from utils import (
-    COMMON_HELP_ARGS,
+    generate_help,
    plural,
    precise,
 )
@@ -18,16 +18,15 @@ from utils import (
 class WordsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
+        return generate_help(
-            "```\n"
+            "words",
-            + "%words: Rank words by their usage\n"
+            "(BETA) Rank words by their usage",
-            + "arguments:\n"
+            args=[
-            + COMMON_HELP_ARGS
+                "<n> - words containings <n> or more letters, default is 3",
-            + "* <n> - words containings <n> or more letters, default is 3\n"
+                "<n2> - top <n2> words, default is 10",
-            + "* <n2> - top <n2> words, default is 10\n"
+                "all/everyone - include bots",
-            + "* everyone - include bots\n"
+            ],
-            + "Example: %words 5 10 #mychannel1 #mychannel2 @user\n"
+            example="5 10 #mychannel1 #mychannel2 @user",
            + "```"
        )
    def __init__(self):
@@ -104,16 +103,13 @@ class WordsScanner(Scanner):
            or message.author in raw_members
        ):
            impacted = True
-            content = " ".join(
+            content = message.content
-                [
+            content = re.sub(r"```.+```", "", content, flags=re.DOTALL)
-                    block
+            content = re.sub(r"`.+`", "", content, flags=re.DOTALL)
-                    for block in message.content.split()
+            content = re.sub(r"\w+:\/\/[^ ]+", "", content)
                    if not re.match(r"^\w+:\/\/", block)
                ]
            )
            for word in re.split("[^\w\-':]", content):
                m = re.match(
-                    r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word
+                    r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word
                )
                if m:
                    word = m[1].lower()
@@ -126,7 +122,5 @@ class WordsScanner(Scanner):
                                words[word] = words[word + case]
                                del words[word + case]
                                break
-                        words[word].update_use(
+                        words[word].update_use(1, message.created_at)
                            message.content.count(word), message.created_at
                        )
        return impacted
@@ -0,0 +1,65 @@
 import discord
 from logs import GuildLogs
 HELP = """```
 %gdpr: Displays GDPR information
 arguments:
 * agree - agree to GDPR
 * revoke - remove this server's data
 ```"""
 TEXT = """
 __**About Analyst-bot's data usage**__
 **TL;DR**
 Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
 **Data collection**
 Analyst-bot collects a Discord guild/server's history when asked to.
 This includes:
 - Visible text channel names
 - Visible text messages: date and time of creation and edition,  author,  content,  reactions and other available metadata (pinned, tts, etc.)
 This does __not__ includes:
 - Voice channels and not visible channels
 - Not visible text messages
 - Visible text messages' embedded content, images and other attachments
 **Data processing**
 Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
 **Data storage and retain policy**
 Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
 Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
 **Data sharing**
 Analyst-bot does not share the data collected with any third-party.
 **Right to retract**
 If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
 **Terms agreement**
 By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
 *If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
 Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
 """
 AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server."
 REVOKE_TEXT = "This guild/server's data has been deleted. To run new analysis you must agree to the terms again."
 async def process(client: discord.client, message: discord.Message, *args: str):
    args = list(args)
    if len(args) == 1:
        await message.channel.send(TEXT)
    elif len(args) > 2:
        await message.channel.send(f"Too many arguments", reference=message)
    elif args[1] == "help":
        await message.channel.send(HELP, reference=message)
    elif args[1] in ["agree", "accept"]:
        GuildLogs.init_log(message.channel.guild)
        await message.channel.send(AGREE_TEXT, reference=message)
    elif args[1] in ["revoke", "cancel", "remove", "delete"]:
        GuildLogs.remove_log(message.channel.guild)
        await message.channel.send(REVOKE_TEXT, reference=message)
    else:
        await message.channel.send(
            f"Unrecognized argument: `{args[1]}`", reference=message
        )
@@ -1,19 +1,44 @@
-from typing import List, Dict, Union, Optional, Any
+from calendar import month
 from typing import Callable, List, Dict, Union, Optional, Any
 import os
 import logging
 import discord
 import math
-from datetime import datetime
+from datetime import datetime, timedelta
 import re
 import dateutil.parser
 from dateutil.relativedelta import relativedelta
 # OTHER
-COMMON_HELP_ARGS = (
+COMMON_HELP_ARGS = [
-    ""
+    "@member/me - filter for one or more member",
-    + "* @member/me - filter for one or more member\n"
+    "#channel/here - filter for one or more channel",
-    + "* #channel/here - filter for one or more channel\n"
+    "<date1> - filter after <date1>",
-    + "* fast - only read cache\n"
+    "<date2> - filter before <date2>",
-    + "* fresh - does not read cache (long)\n"
+    "fast - only read cache",
    "fresh - does not read cache (long)",
 ]
 def generate_help(
    cmd: str,
    info: str,
    *,
    args=["all/everyone - include bots"],
    example="#mychannel1 @user",
    replace_args=[],
 ):
    arg_list = "* " + "\n* ".join(
        replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
    )
    return f"""```
 %{cmd}: {info}
 arguments:
 {arg_list}
 (Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
 Example: %{cmd} {example}
 ```"""
 def delta(t0: datetime):
@@ -92,11 +117,19 @@ def no_duplicate(seq: list) -> list:
 # DICTS
-def top_key(d: Dict[Union[str, int], int]) -> Union[str, int]:
+def top_key(
-    return sorted(d, key=lambda k: d[k])[-1]
+    d: Dict[Union[str, int], int], key: Optional[Callable] = None
 ) -> Union[str, int]:
    if len(d) == 0:
        return None
    if key is None:
        key = lambda k: d[k]
    return sorted(d, key=key)[-1]
 def val_sum(d: Dict[Any, int]) -> int:
    if len(d) == 0:
        return 0
    return sum(d.values())
@@ -135,6 +168,51 @@ def precise(p: float, *, precision: int = 2) -> str:
 # DATE FORMATTING
 ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
 ISO8601_FULL = "0000-01-01T00:00:00"
 def parse_iso_datetime(str_date: str) -> datetime:
    if re.match(
        "^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
    ):
        str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
    return dateutil.parser.parse(str_date)
 RELATIVE_REGEX = r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?)?)"
 def parse_relative_time(src: str) -> datetime:
    timezone_delta = datetime.utcnow() - datetime.now()
    if src == "today":
        return datetime.today() + timezone_delta
    elif src == "yesterday":
        return datetime.today() - relativedelta(days=1) + timezone_delta
    else:
        m = re.match("(\d*)(\w+)", src)
        delta = None
        value = int(m[1]) if m[1] else 1
        unit = m[2][0]
        if unit == "h":
            delta = relativedelta(hours=value)
        elif unit == "d":
            delta = relativedelta(days=value)
        elif unit == "w":
            delta = relativedelta(weeks=value)
        elif unit == "m":
            delta = relativedelta(months=value)
        elif unit == "y":
            delta = relativedelta(years=value)
        return datetime.utcnow() - delta
 def parse_time(src: str) -> datetime:
    if re.match(RELATIVE_REGEX, src):
        return parse_relative_time(src)
    else:
        return parse_iso_datetime(src)
 def str_date(date: datetime) -> str:
    return date.strftime("%d %b. %Y")  # 12 Jun. 2018
@@ -144,29 +222,37 @@ def str_datetime(date: datetime) -> str:
    return date.strftime("%H:%M, %d %b. %Y")  # 12:05, 12 Jun. 2018
-def from_now(src: Optional[datetime]) -> str:
+def str_delta(delay: timedelta) -> str:
    if src is None:
        return "never"
    delay = datetime.utcnow() - src
    seconds = delay.seconds
    minutes = seconds // 60
    hours = minutes // 60
    if delay.days < 1:
        if hours < 1:
            if minutes == 0:
-                return "now"
+                return "no time"
            elif minutes == 1:
-                return "a minute ago"
+                return "a minute"
            else:
-                return f"{minutes} minutes ago"
+                return f"{minutes} minutes"
        elif hours == 1:
-            return "an hour ago"
+            return "an hour"
        else:
-            return f"{hours} hours ago"
+            return f"{hours} hours"
    elif delay.days == 1:
-        return "yesterday"
+        return "one day"
    else:
-        return f"{delay.days:,} days ago"
+        return f"{delay.days:,} days"
 def from_now(src: Optional[datetime]) -> str:
    if src is None:
        return "never"
    output = str_delta(datetime.utcnow() - src)
    if output == "no time":
        return "now"
    elif output == "one day":
        return "yesterday"
    return output + " ago"
 # APP SPECIFIC
@@ -179,46 +265,48 @@ def get_intro(
    members: List[discord.Member],
    nmm: int,  # number of messages impacted
    nc: int,  # number of impacted channels
    start_datetime: datetime,
    stop_datetime: datetime,
 ) -> str:
    """
    Get the introduction sentence of the response
    """
    time_text = ""
    if start_datetime is not None:
        stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
        time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
    # Show all data (members, channels) when it's less than 5 units
    if len(members) == 0:
        # Full scan of the server
        if full:
-            return f"{subject} in this server ({nc} channels, {nmm:,} messages):"
+            return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
        elif len(channels) < 5:
-            return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:"
+            return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
        else:
-            return (
+            return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
                f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
            )
    elif len(members) < 5:
        if full:
-            return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:"
+            return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
        elif len(channels) < 5:
            return (
                f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )
        else:
            return (
                f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )
    else:
        if full:
-            return (
+            return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
                f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
            )
        elif len(channels) < 5:
            return (
                f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )
        else:
            return (
                f"These {len(members)} members on these {len(channels)} channels "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )