Merge pull request #32 from Klemek/dev

v1.13
2021-04-09 19:51:54 +02:00
parent 8cc0e1fe65 76af4661ed
commit 0399fd8e61
30 changed files with 728 additions and 414 deletions
@@ -0,0 +1,26 @@
+name: Python
+
+on: ["push", "pull_request"]
+
+jobs:
+  syntax:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.7, 3.8, 3.9]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
@@ -21,6 +21,7 @@
 * %first - read first message
 * %rand - read a random message
 * %last - read last message
+* %gdpr - displays GDPR information
 * %emojis - rank emotes by their usage
  * arguments:
    * <n> - top <n> emojis, default is 20
@@ -43,7 +44,7 @@
 * %react - rank users by their reactions
  * arguments:
    * <n> - top <n> messages, default is 10
-* %words - rank words by their usage
+* %words - (BETA) rank words by their usage
  * arguments:
    * <n> - words containings <n> or more letters, default is 3
    * <n2> - top <n2> words, default is 10
@@ -52,9 +53,13 @@
 * Common arguments:
    * @member/me: filter for one or more member
    * #channel/here: filter for one or more channel
+    * <date1> - filter after <date1>
+    * <date2> - filter before <date2>
    * all/everyone - include bots messages
    * fast: only read cache
    * fresh: does not read cache
+
+(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
 ```

 ## Running this bot
@@ -104,6 +109,12 @@ python3 src/main.py

 ## Changelog

+* **v1.13**
+  * improved scan `%words`
+  * remove old and unused logs at start and guild leaving
+  * GDPR disclaimer before scanning
+  * start and stop dates
+  * bug fix and improvements
 * **v1.12**
  * more scans: `%words`
  * concurrent `fast` analysis
@@ -1,3 +1,4 @@
 discord.py
 python-dotenv
+python-dateutil
 git+git://github.com/Klemek/miniscord.git
@@ -23,49 +23,45 @@ class Composition:
        self.spoilers = 0

    def to_string(self, msg_count: int) -> List[str]:
-        ret = []
-        ret += [
-            f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}"
-        ]
-        if self.plain_text > 0:
-            ret += [
-                f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
-            ]
-        if self.edited > 0:
-            ret += [
-                f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
-            ]
-        if self.everyone > 0:
-            ret += [
-                f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
-            ]
-        if self.mentions > 0:
-            ret += [
-                f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)",
-            ]
-        if self.answers > 0:
-            ret += [
-                f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
-            ]
        total_emotes = val_sum(self.emotes)
-        if total_emotes > 0:
-            top_emote = top_key(self.emotes)
-            ret += [
-                f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)",
-                f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})",
-            ]
-            if self.emote_only > 0:
-                ret += [
-                    f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
-                ]
-        if self.images > 0:
-            ret += [f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"]
-        if self.links > 0:
-            ret += [f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"]
-        if self.spoilers > 0:
-            ret += [
-                f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
-            ]
-        if self.tts > 0:
-            ret += [f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"]
+        top_emote = top_key(self.emotes)
+        ret = [
+            f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}",
+            f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
+            if self.plain_text > 0
+            else "",
+            f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
+            if self.edited > 0
+            else "",
+            f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
+            if self.everyone > 0
+            else "",
+            f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)"
+            if self.mentions > 0
+            else "",
+            f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
+            if self.answers > 0
+            else "",
+            f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)"
+            if total_emotes > 0
+            else "",
+            f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})"
+            if total_emotes > 0
+            else "",
+            f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
+            if self.emote_only > 0
+            else "",
+            f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"
+            if self.images > 0
+            else "",
+            f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"
+            if self.links > 0
+            else "",
+            f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
+            if self.spoilers > 0
+            else "",
+            f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"
+            if self.tts > 0
+            else "",
+        ]
        return ret
@@ -38,7 +38,10 @@ class Frequency:
        *,
        member_specific: bool,
    ) -> List[str]:
+        self.dates.sort()
        delta = self.dates[-1] - self.dates[0]
+        if delta.days == 0:
+            delta = timedelta(days=1)
        total_msg = len(self.dates)
        busiest_weekday = top_key(self.week)
        busiest_hour = top_key(self.day)
@@ -46,7 +49,7 @@ class Frequency:
        if (
            self.dates[0].weekday() <= busiest_weekday
            and self.dates[-1].weekday() >= busiest_weekday
-        ):
+        ) or n_weekdays == 0:
            n_weekdays += 1
        n_hours = delta.days
        if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:
@@ -56,19 +59,16 @@ class Frequency:
            f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
            f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
            f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
-            f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)",
+            f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
+            if self.busiest_day is not None
+            else "",
            f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
            f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
            f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
            f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})",
            f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg",
+            f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
+            if member_specific
+            else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})",
        ]
-        if member_specific:
-            ret += [
-                f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
-            ]
-        else:
-            ret += [
-                f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})"
-            ]
        return ret
@@ -25,74 +25,70 @@ class Presence:
        show_top_channel: bool,
        member_specific: bool,
    ) -> List[str]:
-        ret = []
        if chan_count is None:
            type = "server's"
        elif chan_count == 1:
            type = "channel's"
        else:
            type = "channels'"
-        if member_specific:
-            ret += [
-                f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
-            ]
-        else:
-            top_member = top_key(self.messages)
-            ret += [
-                f"- **top messages**:  {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})"
-            ]
-        if show_top_channel:
-            top_channel = top_key(self.channel_usage)
-            channel_sum = val_sum(self.channel_usage)
-            found_in = sorted(
-                self.channel_usage,
-                key=lambda k: self.channel_usage[k] / self.channel_total[k],
-            )[-1]
-            ret += [
-                f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})",
-            ]
-            if member_specific:
-                ret += [
-                    f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
-                ]
-        if member_specific:
-            if len(self.mentions) > 0:
-                top_mention = top_key(self.mentions)
-                mention_sum = val_sum(self.mentions)
-                ret += [
-                    f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
-                    f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})",
-                ]
-        if len(self.mention_others) > 0:
-            top_mention = top_key(self.mention_others)
-            mention_sum = val_sum(self.mention_others)
-            if member_specific:
-                ret += [
-                    f"- **mentioned others**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
-                    f"- **mostly mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
-                ]
-            else:
-                top_member = top_key(self.mention_count)
-                ret += [
-                    f"- **mentioned**: {plural(mention_sum, 'time')} ({mention(top_member)}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
-                    f"- **top mentions**: {mention(top_member)} ({plural(self.mention_count[top_member], 'time')}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
-                    f"- **most mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
-                ]
-        if len(self.reactions) > 0:
-            total_used = val_sum(self.reactions)
-            top_reaction = top_key(self.reactions)
-            ret += [
-                f"- **reactions**: {plural(total_used, 'time')}",
-                f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_used)})",
-            ]
-            if member_specific:
-                ret[
-                    -2
-                ] += f" ({percent(total_used/val_sum(self.used_reaction))} of {type})"
-            else:
-                top_member = top_key(self.used_reaction)
-                ret.insert(
-                    -1,
-                    f"- **top reactions**: {mention(top_member)} ({plural(self.used_reaction[top_member], 'time')}, {percent(self.used_reaction[top_member]/val_sum(self.used_reaction))})",
-                )
+        top_member = top_key(self.messages)
+        top_channel = top_key(self.channel_usage)
+        channel_sum = val_sum(self.channel_usage)
+        found_in = top_key(
+            self.channel_usage,
+            key=lambda k: self.channel_usage[k] / self.channel_total[k],
+        )
+        top_mention = top_key(self.mentions)
+        mention_sum = val_sum(self.mentions)
+        top_mention_others = top_key(self.mention_others)
+        mention_others_sum = val_sum(self.mention_others)
+        top_member_mentioned = top_key(self.mention_count)
+        total_reaction_used = val_sum(self.reactions)
+        top_reaction = top_key(self.reactions)
+        top_reaction_member = top_key(self.used_reaction)
+
+        ret = [
+            f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
+            if member_specific
+            else f"- **top messages**:  {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})",
+            f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
+            if show_top_channel
+            else "",
+            f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
+            if show_top_channel and member_specific
+            else "",
+            f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
+            if member_specific and len(self.mentions) > 0
+            else "",
+            f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
+            if member_specific and len(self.mentions) > 0
+            else "",
+            f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
+            if len(self.mention_others) > 0 and member_specific
+            else "",
+            f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
+            if len(self.mention_others) > 0 and member_specific
+            else "",
+            f"- **mentioned**: {plural(mention_others_sum, 'time')} ({mention(top_member_mentioned)}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
+            if len(self.mention_others) > 0 and not member_specific
+            else "",
+            f"- **top mentions**: {mention(top_member_mentioned)} ({plural(self.mention_count[top_member_mentioned], 'time')}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
+            if len(self.mention_others) > 0 and not member_specific
+            else "",
+            f"- **most mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
+            if len(self.mention_others) > 0 and not member_specific
+            else "",
+            f"- **reactions**: {plural(total_reaction_used, 'time')}"
+            if len(self.reactions) > 0 and not member_specific
+            else "",
+            f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
+            if len(self.reactions) > 0 and member_specific
+            else "",
+            f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
+            if len(self.reactions) > 0 and not member_specific
+            else "",
+            f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_reaction_used)})"
+            if len(self.reactions) > 0
+            else "",
+        ]
        return ret
@@ -1,3 +1,3 @@
 from .message_log import MessageLog
 from .channel_logs import ChannelLogs
-from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED
+from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED, NO_FILE
@@ -1,5 +1,7 @@
 from typing import Union, Tuple, Any
 import discord
+from discord import message
+from datetime import datetime

 from . import MessageLog
 from utils import FakeMessage
@@ -7,6 +9,8 @@ from utils import FakeMessage
 CHUNK_SIZE = 2000
 FORMAT = 3

+NOT_SERIALIZED = ["channel", "guild", "start_date"]
+

 class ChannelLogs:
    def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -15,8 +19,10 @@ class ChannelLogs:
            self.id = channel.id
            self.name = channel.name
            self.last_message_id = None
+            self.first_message_id = None
            self.format = FORMAT
            self.messages = []
+            self.start_date = None
        elif isinstance(channel, dict):
            self.format = channel["format"] if "format" in channel else None
            if not self.is_format():
@@ -28,63 +34,102 @@ class ChannelLogs:
                if channel["last_message_id"] is not None
                else None
            )
+            self.first_message_id = (
+                int(channel["first_message_id"])
+                if "first_message_id" in channel
+                and channel["first_message_id"] is not None
+                else None
+            )
            self.messages = [
                MessageLog(message, self) for message in channel["messages"]
            ]
+            self.start_date = (
+                self.messages[-1].created_at if len(self.messages) > 0 else None
+            )

    def is_format(self):
        return self.format == FORMAT

-    async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
+    async def load(
+        self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
+    ) -> Tuple[int, int]:
        self.name = channel.name
        self.channel = channel
+        is_empty = self.last_message_id is None
        try:
-            if self.last_message_id is not None:  # append
+            if is_empty:
+                sanity_check = len(await channel.history(limit=1).flatten())
+                if sanity_check != 1:
+                    yield len(self.messages), True
+                    return
+            # load backward
+            if is_empty or (
+                self.first_message_id is not None
+                and (
+                    start_date is None
+                    or (self.start_date is not None and self.start_date > start_date)
+                )
+            ):
+                first_message_date = None
+                tmp_message_id = 0
+                done = 0
+                while (
+                    first_message_date is None
+                    or (
+                        done >= CHUNK_SIZE
+                        and (start_date is None or first_message_date > start_date)
+                    )
+                ) and tmp_message_id != self.first_message_id:
+                    tmp_message_id = self.first_message_id
+                    done = 0
+                    async for message in channel.history(
+                        limit=CHUNK_SIZE,
+                        before=FakeMessage(self.first_message_id)
+                        if self.first_message_id is not None
+                        else None,
+                        oldest_first=False,
+                    ):
+                        done += 1
+                        self.first_message_id = message.id
+                        first_message_date = message.created_at
+                        m = MessageLog(message, self)
+                        await m.load(message)
+                        self.messages += [m]
+                    yield len(self.messages), False
+                if done < CHUNK_SIZE:  # reached bottom
+                    self.first_message_id = None
+                self.last_message_id = channel.last_message_id
+            # load forward
+            last_message_date = self.messages[0].created_at
+            if not is_empty and (stop_date is None or last_message_date < stop_date):
                tmp_message_id = None
                while (
                    self.last_message_id != channel.last_message_id
-                    and self.last_message_id != tmp_message_id
-                ):
+                    and (stop_date is None or last_message_date < stop_date)
+                ) and self.last_message_id != tmp_message_id:
                    tmp_message_id = self.last_message_id
                    async for message in channel.history(
                        limit=CHUNK_SIZE,
                        after=FakeMessage(self.last_message_id),
                        oldest_first=True,
                    ):
+                        last_message_date = message.created_at
                        self.last_message_id = message.id
                        m = MessageLog(message, self)
                        await m.load(message)
                        self.messages.insert(0, m)
                    yield len(self.messages), False
-            else:  # first load
-                last_message_id = None
-                done = 0
-                sanity_check = len(await channel.history(limit=1).flatten())
-                if sanity_check == 1:
-                    while done >= CHUNK_SIZE or last_message_id is None:
-                        done = 0
-                        async for message in channel.history(
-                            limit=CHUNK_SIZE,
-                            before=FakeMessage(last_message_id)
-                            if last_message_id is not None
-                            else None,
-                            oldest_first=False,
-                        ):
-                            done += 1
-                            last_message_id = message.id
-                            m = MessageLog(message, self)
-                            await m.load(message)
-                            self.messages += [m]
-                        yield len(self.messages), False
-                    self.last_message_id = channel.last_message_id
        except discord.errors.HTTPException:
            yield -1, True
            return  # When an exception occurs (like Forbidden)
+        self.start_date = (
+            self.messages[-1].created_at if len(self.messages) > 0 else None
+        )
        yield len(self.messages), True

    def dict(self) -> dict:
        channel = dict(self.__dict__)
-        channel.pop("channel", None)
-        channel.pop("guild", None)
+        for key in NOT_SERIALIZED:
+            channel.pop(key, None)
        channel["messages"] = [message.dict() for message in self.messages]
        return channel
@@ -15,6 +15,7 @@ from utils import code_message, delta, deltas


 LOG_DIR = "logs"
+LOG_EXT = ".logz"

 current_analysis = []
 current_analysis_lock = threading.Lock()
@@ -22,12 +23,22 @@ current_analysis_lock = threading.Lock()

 ALREADY_RUNNING = -100
 CANCELLED = -200
+NO_FILE = -300

+# 5 minutes, assume 'fast' arg
 MIN_MODIFICATION_TIME = 5 * 60
+# ~1 year, remove log file
+MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60


 class Worker:
-    def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
+    def __init__(
+        self,
+        channel_log: ChannelLogs,
+        channel: discord.TextChannel,
+        start_date: datetime,
+        stop_date: datetime,
+    ):
        self.channel_log = channel_log
        self.channel = channel
        self.start_msg = len(channel_log.messages)
@@ -36,12 +47,16 @@ class Worker:
        self.done = False
        self.cancelled = False
        self.loop = asyncio.get_event_loop()
+        self.start_date = start_date
+        self.stop_date = stop_date

    def start(self):
        asyncio.run_coroutine_threadsafe(self.process(), self.loop)

    async def process(self):
-        async for count, done in self.channel_log.load(self.channel):
+        async for count, done in self.channel_log.load(
+            self.channel, self.start_date, self.stop_date
+        ):
            if count > 0:
                self.queried_msg = count - self.start_msg
                self.total_msg = count
@@ -54,7 +69,7 @@ class GuildLogs:
    def __init__(self, guild: discord.Guild):
        self.id = guild.id
        self.guild = guild
-        self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
+        self.log_file = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
        self.channels = {}
        self.locked = False

@@ -74,26 +89,29 @@ class GuildLogs:
        return self.locked and self.log_file not in current_analysis

    def lock(self) -> bool:
-        self.locked = True
        current_analysis_lock.acquire()
        if self.log_file in current_analysis:
            current_analysis_lock.release()
            return False
+        self.locked = True
        current_analysis.append(self.log_file)
        current_analysis_lock.release()
        return True

    def unlock(self):
-        self.locked = False
-        current_analysis_lock.acquire()
-        if self.log_file in current_analysis:
-            current_analysis.remove(self.log_file)
-        current_analysis_lock.release()
+        if self.locked:
+            self.locked = False
+            current_analysis_lock.acquire()
+            if self.log_file in current_analysis:
+                current_analysis.remove(self.log_file)
+            current_analysis_lock.release()

    async def load(
        self,
        progress: discord.Message,
-        target_channels: List[discord.TextChannel] = [],
+        target_channels: List[discord.TextChannel],
+        start_date: datetime,
+        stop_date: datetime,
        *,
        fast: bool,
        fresh: bool,
@@ -106,52 +124,49 @@ class GuildLogs:
        if not os.path.exists(LOG_DIR):
            os.mkdir(LOG_DIR)
        last_time = None
-        if os.path.exists(self.log_file):
-            channels = {}
-            try:
-                last_time = os.path.getmtime(self.log_file)
-                gziped_data = None
-                await code_message(progress, "Reading saved history (1/4)...")
-                t0 = datetime.now()
-                with open(self.log_file, mode="rb") as f:
-                    gziped_data = f.read()
-                logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
-                if self.check_cancelled():
-                    return CANCELLED, 0
-                await code_message(progress, "Reading saved history (2/4)...")
-                t0 = datetime.now()
-                json_data = gzip.decompress(gziped_data)
-                del gziped_data
-                logging.info(
-                    f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
-                )
-                if self.check_cancelled():
-                    return CANCELLED, 0
-                await code_message(progress, "Reading saved history (3/4)...")
-                t0 = datetime.now()
-                channels = json.loads(json_data)
-                del json_data
-                logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
-                if self.check_cancelled():
-                    return CANCELLED, 0
-                await code_message(progress, "Reading saved history (4/4)...")
-                t0 = datetime.now()
-                self.channels = {
-                    int(id): ChannelLogs(channels[id], self) for id in channels
-                }
-                # remove invalid format
-                self.channels = {
-                    id: self.channels[id]
-                    for id in self.channels
-                    if self.channels[id].is_format()
-                }
-                logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms")
-            except json.decoder.JSONDecodeError:
-                logging.error(f"log {self.guild.id} > invalid JSON")
-            except IOError:
-                logging.error(f"log {self.guild.id} > cannot read")
-        else:
-            fast = False
+        if not os.path.exists(self.log_file):
+            return NO_FILE, 0
+        channels = {}
+        try:
+            last_time = os.path.getmtime(self.log_file)
+            gziped_data = None
+            await code_message(progress, "Reading saved history (1/4)...")
+            t0 = datetime.now()
+            with open(self.log_file, mode="rb") as f:
+                gziped_data = f.read()
+            logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
+            if self.check_cancelled():
+                return CANCELLED, 0
+            await code_message(progress, "Reading saved history (2/4)...")
+            t0 = datetime.now()
+            json_data = gzip.decompress(gziped_data)
+            del gziped_data
+            logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
+            if self.check_cancelled():
+                return CANCELLED, 0
+            await code_message(progress, "Reading saved history (3/4)...")
+            t0 = datetime.now()
+            channels = json.loads(json_data)
+            del json_data
+            logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
+            if self.check_cancelled():
+                return CANCELLED, 0
+            await code_message(progress, "Reading saved history (4/4)...")
+            t0 = datetime.now()
+            self.channels = {
+                int(id): ChannelLogs(channels[id], self) for id in channels
+            }
+            # remove invalid format
+            self.channels = {
+                id: self.channels[id]
+                for id in self.channels
+                if self.channels[id].is_format()
+            }
+            logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms")
+        except json.decoder.JSONDecodeError:
+            logging.error(f"log {self.guild.id} > invalid JSON")
+        except IOError:
+            logging.error(f"log {self.guild.id} > cannot read")

        if len(target_channels) == 0:
            target_channels = (
@@ -171,6 +186,8 @@ class GuildLogs:
        if (
            not fast
            and not fresh
+            and start_date is None
+            and stop_date is None
            and last_time is not None
            and (time.time() - last_time) < MIN_MODIFICATION_TIME
        ):
@@ -178,8 +195,10 @@ class GuildLogs:
                channel
                for channel in target_channels
                if channel.id not in self.channels
+                or self.channels[channel.id].first_message_id is not None
            ]
            if len(invalid_target_channels) == 0:
+                logging.info(f"log {self.guild.id} > assumed fast")
                fast = True
                if self.locked:
                    self.unlock()
@@ -212,7 +231,9 @@ class GuildLogs:
                if channel.id not in self.channels or fresh:
                    loading_new += 1
                    self.channels[channel.id] = ChannelLogs(channel, self)
-                workers += [Worker(self.channels[channel.id], channel)]
+                workers += [
+                    Worker(self.channels[channel.id], channel, start_date, stop_date)
+                ]
            warning_msg = "(this might take a while)"
            if len(target_channels) > 5 and loading_new > 5:
                warning_msg = "(most channels are new, this will take a long while)"
@@ -253,7 +274,7 @@ class GuildLogs:
                    f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
                )
            logging.info(
-                f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
+                f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
            )
            # write logs
            real_total_msg = sum(
@@ -322,3 +343,46 @@ class GuildLogs:
                f"No cancellable analysis are currently running on this server",
                reference=message,
            )
+
+    @staticmethod
+    def init_log(guild: List[discord.Guild]):
+        if not os.path.exists(LOG_DIR):
+            os.mkdir(LOG_DIR)
+        filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
+        if not os.path.exists(filename):
+            with open(filename, mode="wb") as f:
+                f.write(gzip.compress(bytes("{}", "utf-8")))
+            logging.info(f"log {guild.id} > created")
+        else:
+            logging.info(f"log {guild.id} > already exists")
+
+    @staticmethod
+    def remove_log(guild: List[discord.Guild]):
+        if not os.path.exists(LOG_DIR):
+            os.mkdir(LOG_DIR)
+        filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
+        if os.path.exists(filename):
+            os.unlink(filename)
+            logging.info(f"log {guild.id} > removed")
+        else:
+            logging.info(f"log {guild.id} > does not exists")
+
+    @staticmethod
+    def check_logs(guilds: List[discord.Guild]):
+        logging.info(f"checking logs...")
+        if not os.path.exists(LOG_DIR):
+            os.mkdir(LOG_DIR)
+        guild_ids = [str(guild.id) for guild in guilds]
+        for item in os.listdir(LOG_DIR):
+            path = os.path.join(LOG_DIR, item)
+            name, ext = os.path.splitext(item)
+            if os.path.isfile(path) and ext == LOG_EXT:
+                if (
+                    name in guild_ids
+                    and (time.time() - os.path.getmtime(path)) > MAX_MODIFICATION_TIME
+                ):
+                    logging.info(f"> removing old log '{path}'")
+                    os.unlink(path)
+                elif name not in guild_ids:
+                    logging.info(f"> removing unused log '{path}'")
+                    os.unlink(path)
@@ -8,6 +8,9 @@ IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
 EMBED_IMAGES = ["image", "gifv"]


+NOT_SERIALIZED = ["channel"]
+
+
 class MessageLog:
    def __init__(self, message: Union[discord.Message, dict], channel: Any):
        self.channel = channel
@@ -79,7 +82,8 @@ class MessageLog:

    def dict(self) -> dict:
        message = dict(self.__dict__)
-        message.pop("channel", None)
+        for key in NOT_SERIALIZED:
+            message.pop(key, None)
        message["created_at"] = self.created_at.isoformat()
        message["edited_at"] = (
            self.edited_at.isoformat() if self.edited_at is not None else None
@@ -6,7 +6,7 @@ if sys.version_info < (3, 7):
    print("Please upgrade your Python version to 3.7.0 or higher")
    sys.exit(1)

-from utils import emojis
+from utils import emojis, gdpr
 from scanners import (
    EmotesScanner,
    FullScanner,
@@ -33,17 +33,43 @@ emojis.load_emojis()

 bot = Bot(
    "Discord Analyst",
-    "1.12",
+    "1.13",
    alias="%",
 )

 bot.log_calls = True

+
+async def on_ready():
+    GuildLogs.check_logs(bot.client.guilds)
+    return True
+
+
+async def on_guild_remove():
+    GuildLogs.check_logs(bot.client.guilds)
+    return True
+
+
+bot.register_event(on_ready)
+bot.register_event(on_guild_remove)
+
 bot.register_command(
    "(cancel|stop)",
    GuildLogs.cancel,
    "cancel: stop current analysis (not launched with fast)",
-    "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
+    "```\n%cancel: Stop current analysis (not launched with fast)\n```",
+)
+bot.register_command(
+    "gdpr",
+    gdpr.process,
+    "gdpr: displays GDPR information",
+    gdpr.HELP,
+)
+bot.register_command(
+    "words",
+    lambda *args: WordsScanner().compute(*args),
+    "words: (BETA) rank words by their usage",
+    WordsScanner.help(),
 )
 bot.register_command(
    "last",
@@ -63,12 +89,6 @@ bot.register_command(
    "first: read first message",
    FirstScanner.help(),
 )
-bot.register_command(
-    "words",
-    lambda *args: WordsScanner().compute(*args),
-    "words: rank words by their usage",
-    WordsScanner.help(),
-)
 bot.register_command(
    "mentioned",
    lambda *args: MentionedScanner().compute(*args),
@@ -11,4 +11,4 @@ from .reactions_scanner import ReactionsScanner
 from .first_scanner import FirstScanner
 from .last_scanner import LastScanner
 from .random_scanner import RandomScanner
-from .words_scanner import WordsScanner
+from .words_scanner import WordsScanner
@@ -8,21 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention


 class ChannelsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%chan: Rank channels by their messages\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n>, default is 10\n"
-            + "* all/everyone - include bots\n"
-            + "Example: %chan 10 @user\n"
-            + "```"
+        return generate_help(
+            "chan",
+            "Rank channels by their messages",
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+            example="5 @user",
        )

    def __init__(self):
@@ -8,21 +8,13 @@ import discord
 from .scanner import Scanner
 from data_types import Composition
 from logs import ChannelLogs, MessageLog
-from utils import emojis, COMMON_HELP_ARGS
+from utils import emojis, generate_help


 class CompositionScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%compo: Show composition statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %compo #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("compo", "Show composition statistics")

    def __init__(self):
        super().__init__(
@@ -8,24 +8,23 @@ import discord
 from logs import ChannelLogs, MessageLog
 from data_types import Emote, get_emote_dict
 from .scanner import Scanner
-from utils import emojis, COMMON_HELP_ARGS, plural, precise
+from utils import emojis, generate_help, plural, precise


 class EmotesScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%emojis: Rank emojis by their usage\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n> emojis, default is 20\n"
-            + "* all - list all common emojis in addition to this guild's\n"
-            + "* members - show top member for each emojis\n"
-            + "* sort:usage/reaction - other sorting methods\n"
-            + "* everyone - include bots\n"
-            + "Example: %emojis 10 all #mychannel1 #mychannel2 @user\n"
-            + "```"
+        return generate_help(
+            "emojis",
+            "Rank emojis by their usage",
+            args=[
+                "<n> - top <n> emojis, default is 20",
+                "all - list all common emojis in addition to this guild's",
+                "members - show top member for each emojis",
+                "sort:usage/reaction - other sorting methods",
+                "everyone - include bots",
+            ],
+            example="10 all #mychannel1 #mychannel2 @user",
        )

    def __init__(self):
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs

 from .history_scanner import HistoryScanner
+from utils import generate_help


 class FirstScanner(HistoryScanner):
    @staticmethod
    def help() -> str:
-        return super(FirstScanner, FirstScanner).help(
-            cmd="first", text="Read first message"
-        )
+        return generate_help("first", "Read first message")

    def __init__(self):
        super().__init__(help=FirstScanner.help())
@@ -8,21 +8,13 @@ import discord
 from .scanner import Scanner
 from data_types import Frequency
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help


 class FrequencyScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%freq: Show frequency-related statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %freq #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("freq", "Show frequency-related statistics")

    def __init__(self):
        super().__init__(
@@ -55,7 +47,7 @@ class FrequencyScanner(Scanner):
        freq: Frequency,
        raw_members: List[int],
        *,
-        all_messages: bool
+        all_messages: bool,
    ) -> bool:
        impacted = False
        # If author is included in the selection (empty list is all)
@@ -8,21 +8,13 @@ from .scanner import Scanner
 from . import FrequencyScanner, CompositionScanner, PresenceScanner
 from data_types import Frequency, Composition, Presence
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help


 class FullScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%scan: Show full statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %scan #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("scan", "Show full statistics")

    def __init__(self):
        super().__init__(
@@ -7,22 +7,9 @@ import discord
 from .scanner import Scanner
 from data_types import History
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS


 class HistoryScanner(Scanner, ABC):
-    @staticmethod
-    def help(*, cmd: str, text: str) -> str:
-        return (
-            "```\n"
-            + f"%{cmd}: {text}\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %{cmd} #mychannel1 @user\n"
-            + "```"
-        )
-
    def __init__(self, *, help: str):
        super().__init__(
            has_digit_args=True,
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs

 from .history_scanner import HistoryScanner
+from utils import generate_help


 class LastScanner(HistoryScanner):
    @staticmethod
    def help() -> str:
-        return super(LastScanner, LastScanner).help(
-            cmd="last", text="Read last message"
-        )
+        return generate_help("last", "Read last message")

    def __init__(self):
        super().__init__(help=LastScanner.help())
@@ -8,22 +8,18 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, plural, precise, mention, alt_mention
+from utils import generate_help, plural, precise, mention, alt_mention


 class MentionedScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%mentioned: Rank specific user's mentions by their usage\n"
-            + "arguments:\n"
-            + "* @member/me - (required) one or more member\n"
-            + "\n".join(COMMON_HELP_ARGS.split("\n")[1:])
-            + "* <n> - top <n> mentions, default is 10\n"
-            + "* all - include bots mentions\n"
-            + "Example: %mentioned 10 @user\n"
-            + "```"
+        return generate_help(
+            "mentioned",
+            "Rank specific user's mentions by their usage",
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+            example="5 @user",
+            replace_args=[" @member/me - (required) one or more member"],
        )

    def __init__(self):
@@ -45,7 +41,7 @@ class MentionedScanner(Scanner):
                "You need to mention at least one member or use `me`", reference=message
            )
            return False
-        self.all_mentions = "all" in args
+        self.all_mentions = "all" in args or "everyone" in args
        # Create mentions dict
        self.mentions = defaultdict(Counter)
        return True
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
 from utils import (
-    COMMON_HELP_ARGS,
+    generate_help,
    plural,
    precise,
    mention,
@@ -22,16 +22,15 @@ from utils import (
 class MentionsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%mentions: Rank mentions by their usage\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n> mentions, default is 10\n"
-            + "* all - show role/channel/everyone/here mentions\n"
-            + "* everyone - include bots mentions\n"
-            + "Example: %mentions 10 #mychannel1 #mychannel2 @user\n"
-            + "```"
+        return generate_help(
+            "mentions",
+            "Rank mentions by their usage",
+            args=[
+                "<n> - top <n>, default is 10",
+                "all - show role/channel/everyone/here mentions",
+                "everyone - include bots mentions",
+            ],
+            example="10 #mychannel1 #mychannel2 @user",
        )

    def __init__(self):
@@ -8,21 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention


 class MessagesScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%msg: Rank users by their messages\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n>, default is 10\n"
-            + "* all/everyone - include bots\n"
-            + "Example: %msg 10 #channel\n"
-            + "```"
+        return generate_help(
+            "msg",
+            "Rank users by their messages",
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+            example="10 #channel",
        )

    def __init__(self):
@@ -7,21 +7,13 @@ import discord
 from .scanner import Scanner
 from data_types import Presence
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help


 class PresenceScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%pres: Show presence statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %pres #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("pres", "Show presence statistics")

    def __init__(self):
        super().__init__(
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs

 from .history_scanner import HistoryScanner
+from utils import generate_help


 class RandomScanner(HistoryScanner):
    @staticmethod
    def help() -> str:
-        return super(RandomScanner, RandomScanner).help(
-            cmd="rand", text="Read a random message"
-        )
+        return generate_help("rand", "Read a random message")

    def __init__(self):
        super().__init__(help=RandomScanner.help())
@@ -8,20 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention


 class ReactionsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%react: Rank users by their reactions\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n>, default is 10\n"
-            + "Example: %react 10 #channel\n"
-            + "```"
+        return generate_help(
+            "react",
+            "Rank users by their reactions",
+            args=["<n> - top <n>, default is 10"],
+            example="10 #channel",
        )

    def __init__(self):
@@ -5,8 +5,24 @@ import logging
 import re
 import discord

-from utils import no_duplicate, get_intro, delta
-from logs import GuildLogs, ChannelLogs, MessageLog, ALREADY_RUNNING, CANCELLED
+
+from utils import (
+    no_duplicate,
+    get_intro,
+    delta,
+    gdpr,
+    ISO8601_REGEX,
+    RELATIVE_REGEX,
+    parse_time,
+)
+from logs import (
+    GuildLogs,
+    ChannelLogs,
+    MessageLog,
+    ALREADY_RUNNING,
+    CANCELLED,
+    NO_FILE,
+)


 class Scanner(ABC):
@@ -47,22 +63,42 @@ class Scanner(ABC):
                str(channel.id) for channel in message.channel_mentions
            ]
            str_mentions = [str(member.id) for member in message.mentions]
+            dates = []
            for i, arg in enumerate(args[1:]):
+                skip_check = False
                if re.match(r"^<@!?\d+>$", arg):
                    arg = arg[3:-1] if "!" in arg else arg[2:-1]
                elif re.match(r"^<#!?\d+>$", arg):
                    arg = arg[3:-1] if "!" in arg else arg[2:-1]
+                elif re.match(ISO8601_REGEX, arg) or re.match(RELATIVE_REGEX, arg):
+                    dates += [parse_time(arg)]
+                    skip_check = True
+                    if len(dates) > 2:
+                        await message.channel.send(
+                            f"Too many date arguments: `{arg}`", reference=message
+                        )
+                        return
                if (
                    arg not in self.valid_args + ["me", "here", "fast", "fresh"]
                    and (not arg.isdigit() or not self.has_digit_args)
                    and arg not in str_channel_mentions
                    and arg not in str_mentions
+                    and not skip_check
                ):
                    await message.channel.send(
                        f"Unrecognized argument: `{arg}`", reference=message
                    )
                    return

+            self.start_date = None if len(dates) < 1 else min(dates)
+            self.stop_date = None if len(dates) < 2 else max(dates)
+
+            if self.start_date is not None and self.start_date > datetime.now():
+                await message.channel.send(
+                    f"Start date is after today", reference=message
+                )
+                return
+
            # Get selected channels or all of them if no channel arguments
            self.channels = no_duplicate(message.channel_mentions)

@@ -94,7 +130,12 @@ class Scanner(ABC):
                    allowed_mentions=discord.AllowedMentions.none(),
                )
                total_msg, total_chan = await logs.load(
-                    progress, self.channels, fast="fast" in args, fresh="fresh" in args
+                    progress,
+                    self.channels,
+                    self.start_date,
+                    self.stop_date,
+                    fast="fast" in args,
+                    fresh="fresh" in args,
                )
                if total_msg == CANCELLED:
                    await message.channel.send(
@@ -106,7 +147,24 @@ class Scanner(ABC):
                        "An analysis is already running on this server, please be patient.",
                        reference=message,
                    )
+                elif total_msg == NO_FILE:
+                    await message.channel.send(gdpr.TEXT)
                else:
+                    if self.start_date is not None and len(logs.channels) > 0:
+                        self.start_date = max(
+                            self.start_date,
+                            min(
+                                [
+                                    logs.channels[channel.id].start_date
+                                    for channel in self.channels
+                                    if channel.id in logs.channels
+                                    and logs.channels[channel.id].start_date is not None
+                                ]
+                            ),
+                        )
+                        if self.stop_date is None:
+                            self.stop_date = datetime.utcnow()
+
                    self.msg_count = 0
                    self.total_msg = 0
                    self.chan_count = 0
@@ -118,13 +176,21 @@ class Scanner(ABC):
                                [
                                    self.compute_message(channel_logs, message_log)
                                    for message_log in channel_logs.messages
+                                    if (
+                                        self.start_date is None
+                                        or message_log.created_at >= self.start_date
+                                    )
+                                    and (
+                                        self.stop_date is None
+                                        or message_log.created_at <= self.stop_date
+                                    )
                                ]
                            )
                            self.total_msg += len(channel_logs.messages)
                            self.msg_count += count
                            self.chan_count += 1 if count > 0 else 0
                    logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
-                    if self.total_msg == 0:
+                    if self.msg_count == 0:
                        await message.channel.send(
                            "There are no messages found matching the filters",
                            reference=message,
@@ -141,21 +207,24 @@ class Scanner(ABC):
                                self.members,
                                self.msg_count,
                                self.chan_count,
+                                self.start_date,
+                                self.stop_date,
                            )
                        )
                        logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
                        response = ""
                        first = True
                        for r in results:
-                            if len(response + "\n" + r) > 2000:
-                                await message.channel.send(
-                                    response,
-                                    reference=message if first else None,
-                                    allowed_mentions=discord.AllowedMentions.none(),
-                                )
-                                first = False
-                                response = ""
-                            response += "\n" + r
+                            if r:
+                                if len(response + "\n" + r) > 2000:
+                                    await message.channel.send(
+                                        response,
+                                        reference=message if first else None,
+                                        allowed_mentions=discord.AllowedMentions.none(),
+                                    )
+                                    first = False
+                                    response = ""
+                                response += "\n" + r
                        if len(response) > 0:
                            await message.channel.send(
                                response,
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
 from utils import (
-    COMMON_HELP_ARGS,
+    generate_help,
    plural,
    precise,
 )
@@ -18,16 +18,15 @@ from utils import (
 class WordsScanner(Scanner):
    @staticmethod
    def help() -> str:
-        return (
-            "```\n"
-            + "%words: Rank words by their usage\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - words containings <n> or more letters, default is 3\n"
-            + "* <n2> - top <n2> words, default is 10\n"
-            + "* everyone - include bots\n"
-            + "Example: %words 5 10 #mychannel1 #mychannel2 @user\n"
-            + "```"
+        return generate_help(
+            "words",
+            "(BETA) Rank words by their usage",
+            args=[
+                "<n> - words containings <n> or more letters, default is 3",
+                "<n2> - top <n2> words, default is 10",
+                "all/everyone - include bots",
+            ],
+            example="5 10 #mychannel1 #mychannel2 @user",
        )

    def __init__(self):
@@ -104,16 +103,13 @@ class WordsScanner(Scanner):
            or message.author in raw_members
        ):
            impacted = True
-            content = " ".join(
-                [
-                    block
-                    for block in message.content.split()
-                    if not re.match(r"^\w+:\/\/", block)
-                ]
-            )
+            content = message.content
+            content = re.sub(r"```.+```", "", content, flags=re.DOTALL)
+            content = re.sub(r"`.+`", "", content, flags=re.DOTALL)
+            content = re.sub(r"\w+:\/\/[^ ]+", "", content)
            for word in re.split("[^\w\-':]", content):
                m = re.match(
-                    r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word
+                    r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word
                )
                if m:
                    word = m[1].lower()
@@ -126,7 +122,5 @@ class WordsScanner(Scanner):
                                words[word] = words[word + case]
                                del words[word + case]
                                break
-                        words[word].update_use(
-                            message.content.count(word), message.created_at
-                        )
+                        words[word].update_use(1, message.created_at)
        return impacted
@@ -0,0 +1,65 @@
+import discord
+
+from logs import GuildLogs
+
+
+HELP = """```
+%gdpr: Displays GDPR information
+arguments:
+* agree - agree to GDPR
+* revoke - remove this server's data
+```"""
+
+TEXT = """
+__**About Analyst-bot's data usage**__
+**TL;DR**
+Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
+**Data collection**
+Analyst-bot collects a Discord guild/server's history when asked to.
+This includes:
+- Visible text channel names
+- Visible text messages: date and time of creation and edition,  author,  content,  reactions and other available metadata (pinned, tts, etc.)
+This does __not__ includes:
+- Voice channels and not visible channels
+- Not visible text messages
+- Visible text messages' embedded content, images and other attachments
+**Data processing**
+Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
+**Data storage and retain policy**
+Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
+Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
+**Data sharing**
+Analyst-bot does not share the data collected with any third-party.
+**Right to retract**
+If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
+**Terms agreement**
+By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
+
+*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
+
+Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
+"""
+
+AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server."
+
+REVOKE_TEXT = "This guild/server's data has been deleted. To run new analysis you must agree to the terms again."
+
+
+async def process(client: discord.client, message: discord.Message, *args: str):
+    args = list(args)
+    if len(args) == 1:
+        await message.channel.send(TEXT)
+    elif len(args) > 2:
+        await message.channel.send(f"Too many arguments", reference=message)
+    elif args[1] == "help":
+        await message.channel.send(HELP, reference=message)
+    elif args[1] in ["agree", "accept"]:
+        GuildLogs.init_log(message.channel.guild)
+        await message.channel.send(AGREE_TEXT, reference=message)
+    elif args[1] in ["revoke", "cancel", "remove", "delete"]:
+        GuildLogs.remove_log(message.channel.guild)
+        await message.channel.send(REVOKE_TEXT, reference=message)
+    else:
+        await message.channel.send(
+            f"Unrecognized argument: `{args[1]}`", reference=message
+        )
@@ -1,19 +1,44 @@
-from typing import List, Dict, Union, Optional, Any
+from calendar import month
+from typing import Callable, List, Dict, Union, Optional, Any
 import os
 import logging
 import discord
 import math
-from datetime import datetime
+from datetime import datetime, timedelta
+import re
+import dateutil.parser
+from dateutil.relativedelta import relativedelta

 # OTHER

-COMMON_HELP_ARGS = (
-    ""
-    + "* @member/me - filter for one or more member\n"
-    + "* #channel/here - filter for one or more channel\n"
-    + "* fast - only read cache\n"
-    + "* fresh - does not read cache (long)\n"
-)
+COMMON_HELP_ARGS = [
+    "@member/me - filter for one or more member",
+    "#channel/here - filter for one or more channel",
+    "<date1> - filter after <date1>",
+    "<date2> - filter before <date2>",
+    "fast - only read cache",
+    "fresh - does not read cache (long)",
+]
+
+
+def generate_help(
+    cmd: str,
+    info: str,
+    *,
+    args=["all/everyone - include bots"],
+    example="#mychannel1 @user",
+    replace_args=[],
+):
+    arg_list = "* " + "\n* ".join(
+        replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
+    )
+    return f"""```
+%{cmd}: {info}
+arguments:
+{arg_list}
+(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
+Example: %{cmd} {example}
+```"""


 def delta(t0: datetime):
@@ -92,11 +117,19 @@ def no_duplicate(seq: list) -> list:
 # DICTS


-def top_key(d: Dict[Union[str, int], int]) -> Union[str, int]:
-    return sorted(d, key=lambda k: d[k])[-1]
+def top_key(
+    d: Dict[Union[str, int], int], key: Optional[Callable] = None
+) -> Union[str, int]:
+    if len(d) == 0:
+        return None
+    if key is None:
+        key = lambda k: d[k]
+    return sorted(d, key=key)[-1]


 def val_sum(d: Dict[Any, int]) -> int:
+    if len(d) == 0:
+        return 0
    return sum(d.values())


@@ -135,6 +168,51 @@ def precise(p: float, *, precision: int = 2) -> str:

 # DATE FORMATTING

+ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
+ISO8601_FULL = "0000-01-01T00:00:00"
+
+
+def parse_iso_datetime(str_date: str) -> datetime:
+    if re.match(
+        "^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
+    ):
+        str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
+    return dateutil.parser.parse(str_date)
+
+
+RELATIVE_REGEX = r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?)?)"
+
+
+def parse_relative_time(src: str) -> datetime:
+    timezone_delta = datetime.utcnow() - datetime.now()
+    if src == "today":
+        return datetime.today() + timezone_delta
+    elif src == "yesterday":
+        return datetime.today() - relativedelta(days=1) + timezone_delta
+    else:
+        m = re.match("(\d*)(\w+)", src)
+        delta = None
+        value = int(m[1]) if m[1] else 1
+        unit = m[2][0]
+        if unit == "h":
+            delta = relativedelta(hours=value)
+        elif unit == "d":
+            delta = relativedelta(days=value)
+        elif unit == "w":
+            delta = relativedelta(weeks=value)
+        elif unit == "m":
+            delta = relativedelta(months=value)
+        elif unit == "y":
+            delta = relativedelta(years=value)
+        return datetime.utcnow() - delta
+
+
+def parse_time(src: str) -> datetime:
+    if re.match(RELATIVE_REGEX, src):
+        return parse_relative_time(src)
+    else:
+        return parse_iso_datetime(src)
+

 def str_date(date: datetime) -> str:
    return date.strftime("%d %b. %Y")  # 12 Jun. 2018
@@ -144,29 +222,37 @@ def str_datetime(date: datetime) -> str:
    return date.strftime("%H:%M, %d %b. %Y")  # 12:05, 12 Jun. 2018


-def from_now(src: Optional[datetime]) -> str:
-    if src is None:
-        return "never"
-    delay = datetime.utcnow() - src
+def str_delta(delay: timedelta) -> str:
    seconds = delay.seconds
    minutes = seconds // 60
    hours = minutes // 60
    if delay.days < 1:
        if hours < 1:
            if minutes == 0:
-                return "now"
+                return "no time"
            elif minutes == 1:
-                return "a minute ago"
+                return "a minute"
            else:
-                return f"{minutes} minutes ago"
+                return f"{minutes} minutes"
        elif hours == 1:
-            return "an hour ago"
+            return "an hour"
        else:
-            return f"{hours} hours ago"
+            return f"{hours} hours"
    elif delay.days == 1:
-        return "yesterday"
+        return "one day"
    else:
-        return f"{delay.days:,} days ago"
+        return f"{delay.days:,} days"
+
+
+def from_now(src: Optional[datetime]) -> str:
+    if src is None:
+        return "never"
+    output = str_delta(datetime.utcnow() - src)
+    if output == "no time":
+        return "now"
+    elif output == "one day":
+        return "yesterday"
+    return output + " ago"


 # APP SPECIFIC
@@ -179,46 +265,48 @@ def get_intro(
    members: List[discord.Member],
    nmm: int,  # number of messages impacted
    nc: int,  # number of impacted channels
+    start_datetime: datetime,
+    stop_datetime: datetime,
 ) -> str:
    """
    Get the introduction sentence of the response
    """
+    time_text = ""
+    if start_datetime is not None:
+        stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
+        time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
    # Show all data (members, channels) when it's less than 5 units
    if len(members) == 0:
        # Full scan of the server
        if full:
-            return f"{subject} in this server ({nc} channels, {nmm:,} messages):"
+            return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
        elif len(channels) < 5:
-            return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:"
+            return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
        else:
-            return (
-                f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
-            )
+            return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
    elif len(members) < 5:
        if full:
-            return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:"
+            return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
        elif len(channels) < 5:
            return (
                f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )
        else:
            return (
                f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )
    else:
        if full:
-            return (
-                f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
-            )
+            return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
        elif len(channels) < 5:
            return (
                f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )
        else:
            return (
                f"These {len(members)} members on these {len(channels)} channels "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
            )