From 04f681dba6f09551805c19df58c8035581be610e Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 00:40:28 +0200
Subject: [PATCH 01/20] %words improvement

---
 README.md                     |  4 +++-
 src/main.py                   | 12 ++++++------
 src/scanners/words_scanner.py | 19 +++++++------------
 3 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/README.md b/README.md
index 881d143..caac0c2 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@
 * %react - rank users by their reactions
   * arguments:
     * <n> - top <n> messages, default is 10
-* %words - rank words by their usage
+* %words - (BETA) rank words by their usage
   * arguments:
     * <n> - words containings <n> or more letters, default is 3
     * <n2> - top <n2> words, default is 10
@@ -104,6 +104,8 @@ python3 src/main.py
 
 ## Changelog
 
+* **v1.13**
+  * improved scan `%words`
 * **v1.12**
   * more scans: `%words`
   * concurrent `fast` analysis
diff --git a/src/main.py b/src/main.py
index 565e92d..4ac9b5f 100644
--- a/src/main.py
+++ b/src/main.py
@@ -45,6 +45,12 @@ bot.register_command(
     "cancel: stop current analysis (not launched with fast)",
     "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
 )
+bot.register_command(
+    "words",
+    lambda *args: WordsScanner().compute(*args),
+    "words: (BETA) rank words by their usage",
+    WordsScanner.help(),
+)
 bot.register_command(
     "last",
     lambda *args: LastScanner().compute(*args),
@@ -63,12 +69,6 @@ bot.register_command(
     "first: read first message",
     FirstScanner.help(),
 )
-bot.register_command(
-    "words",
-    lambda *args: WordsScanner().compute(*args),
-    "words: rank words by their usage",
-    WordsScanner.help(),
-)
 bot.register_command(
     "mentioned",
     lambda *args: MentionedScanner().compute(*args),
diff --git a/src/scanners/words_scanner.py b/src/scanners/words_scanner.py
index 80d0971..c31bf9b 100644
--- a/src/scanners/words_scanner.py
+++ b/src/scanners/words_scanner.py
@@ -20,7 +20,7 @@ class WordsScanner(Scanner):
     def help() -> str:
         return (
             "```\n"
-            + "%words: Rank words by their usage\n"
+            + "%words: (BETA) Rank words by their usage\n"
             + "arguments:\n"
             + COMMON_HELP_ARGS
             + "* <n> - words containings <n> or more letters, default is 3\n"
@@ -104,16 +104,13 @@ class WordsScanner(Scanner):
             or message.author in raw_members
         ):
             impacted = True
-            content = " ".join(
-                [
-                    block
-                    for block in message.content.split()
-                    if not re.match(r"^\w+:\/\/", block)
-                ]
-            )
+            content = message.content
+            content = re.sub(r"```.+```", "", content, flags=re.DOTALL)
+            content = re.sub(r"`.+`", "", content, flags=re.DOTALL)
+            content = re.sub(r"\w+:\/\/[^ ]+", "", content)
             for word in re.split("[^\w\-':]", content):
                 m = re.match(
-                    r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word
+                    r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word
                 )
                 if m:
                     word = m[1].lower()
@@ -126,7 +123,5 @@ class WordsScanner(Scanner):
                                 words[word] = words[word + case]
                                 del words[word + case]
                                 break
-                        words[word].update_use(
-                            message.content.count(word), message.created_at
-                        )
+                        words[word].update_use(1, message.created_at)
         return impacted

From a26b90f3928e71e970b93b88ee46c05ead9d1cc4 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 00:41:54 +0200
Subject: [PATCH 02/20] simple CI

---
 .github/workflows/python.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .github/workflows/python.yml

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 0000000..68ceedd
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,29 @@
+name: Python
+
+on: ["push", "pull_request"]
+
+jobs:
+  syntax:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.7, 3.8, 3.9]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 black
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Code style with black
+      run: |
+        black --check
\ No newline at end of file

From ee71314c41404d1a25744c30c63bb241be981e1d Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 00:45:57 +0200
Subject: [PATCH 03/20] removed black check

---
 .github/workflows/python.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 68ceedd..75757c7 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -23,7 +23,4 @@ jobs:
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Code style with black
-      run: |
-        black --check
\ No newline at end of file
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
\ No newline at end of file

From 6cacb832bf18ec962e391d7a5f6f47dff121ddd3 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 00:46:36 +0200
Subject: [PATCH 04/20] removed black check

---
 .github/workflows/python.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 75757c7..5029c28 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -17,7 +17,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install flake8 black
+        python -m pip install flake8
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names

From 48c4e82cdfce7b39e5e12625685b83b5df6f303a Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 12:19:43 +0200
Subject: [PATCH 05/20] remove old and unused logs at start and guild leaving

---
 README.md              |  1 +
 src/logs/guild_logs.py | 24 +++++++++++++++++++++++-
 src/main.py            | 14 ++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index caac0c2..c130ef5 100644
--- a/README.md
+++ b/README.md
@@ -106,6 +106,7 @@ python3 src/main.py
 
 * **v1.13**
   * improved scan `%words`
+  * remove old and unused logs at start and guild leaving
 * **v1.12**
   * more scans: `%words`
   * concurrent `fast` analysis
diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py
index 88d8823..6fe0201 100644
--- a/src/logs/guild_logs.py
+++ b/src/logs/guild_logs.py
@@ -15,6 +15,7 @@ from utils import code_message, delta, deltas
 
 
 LOG_DIR = "logs"
+LOG_EXT = ".logz"
 
 current_analysis = []
 current_analysis_lock = threading.Lock()
@@ -23,7 +24,10 @@ current_analysis_lock = threading.Lock()
 ALREADY_RUNNING = -100
 CANCELLED = -200
 
+# 5 minutes, assume 'fast' arg
 MIN_MODIFICATION_TIME = 5 * 60
+# ~6 months, remove log file
+MAX_MODIFICATION_TIME = 6 * 30.5 * 24 * 60 * 60
 
 
 class Worker:
@@ -54,7 +58,7 @@ class GuildLogs:
     def __init__(self, guild: discord.Guild):
         self.id = guild.id
         self.guild = guild
-        self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
+        self.log_file = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
         self.channels = {}
         self.locked = False
 
@@ -322,3 +326,21 @@ class GuildLogs:
                 f"No cancellable analysis are currently running on this server",
                 reference=message,
             )
+
+    @staticmethod
+    def check_logs(guilds: List[discord.Guild]):
+        logging.info(f"checking logs...")
+        guild_ids = [str(guild.id) for guild in guilds]
+        for item in os.listdir(LOG_DIR):
+            path = os.path.join(LOG_DIR, item)
+            name, ext = os.path.splitext(item)
+            if os.path.isfile(path) and ext == LOG_EXT:
+                if (
+                    name in guild_ids
+                    and (time.time() - os.path.getmtime(path)) > MAX_MODIFICATION_TIME
+                ):
+                    logging.info(f"> removing old log '{path}'")
+                    os.unlink(path)
+                elif name not in guild_ids:
+                    logging.info(f"> removing unused log '{path}'")
+                    os.unlink(path)
diff --git a/src/main.py b/src/main.py
index 4ac9b5f..69f6b0a 100644
--- a/src/main.py
+++ b/src/main.py
@@ -39,6 +39,20 @@ bot = Bot(
 
 bot.log_calls = True
 
+
+async def on_ready():
+    GuildLogs.check_logs(bot.client.guilds)
+    return True
+
+
+async def on_guild_remove():
+    GuildLogs.check_logs(bot.client.guilds)
+    return True
+
+
+bot.register_event(on_ready)
+bot.register_event(on_guild_remove)
+
 bot.register_command(
     "(cancel|stop)",
     GuildLogs.cancel,

From 0550a16c51f8e3899e8a9bb21c47433fe114a73f Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 12:20:36 +0200
Subject: [PATCH 06/20] create log dir before checking

---
 src/logs/guild_logs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py
index 6fe0201..3c4af39 100644
--- a/src/logs/guild_logs.py
+++ b/src/logs/guild_logs.py
@@ -330,6 +330,8 @@ class GuildLogs:
     @staticmethod
     def check_logs(guilds: List[discord.Guild]):
         logging.info(f"checking logs...")
+        if not os.path.exists(LOG_DIR):
+            os.mkdir(LOG_DIR)
         guild_ids = [str(guild.id) for guild in guilds]
         for item in os.listdir(LOG_DIR):
             path = os.path.join(LOG_DIR, item)

From 6a70663201a95471365e287bd2638de7d4df8877 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 14:57:55 +0200
Subject: [PATCH 07/20] gdpr agreements

---
 src/logs/__init__.py    |   2 +-
 src/logs/guild_logs.py  | 117 +++++++++++++++++++++++-----------------
 src/main.py             |  10 +++-
 src/scanners/scanner.py |  13 ++++-
 src/utils/gdpr.py       |  68 +++++++++++++++++++++++
 5 files changed, 157 insertions(+), 53 deletions(-)
 create mode 100644 src/utils/gdpr.py

diff --git a/src/logs/__init__.py b/src/logs/__init__.py
index 358e9af..d62ab1d 100644
--- a/src/logs/__init__.py
+++ b/src/logs/__init__.py
@@ -1,3 +1,3 @@
 from .message_log import MessageLog
 from .channel_logs import ChannelLogs
-from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED
+from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED, NO_FILE
diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py
index 3c4af39..9aa0d1a 100644
--- a/src/logs/guild_logs.py
+++ b/src/logs/guild_logs.py
@@ -23,11 +23,12 @@ current_analysis_lock = threading.Lock()
 
 ALREADY_RUNNING = -100
 CANCELLED = -200
+NO_FILE = -300
 
 # 5 minutes, assume 'fast' arg
 MIN_MODIFICATION_TIME = 5 * 60
-# ~6 months, remove log file
-MAX_MODIFICATION_TIME = 6 * 30.5 * 24 * 60 * 60
+# ~1 year, remove log file
+MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
 
 
 class Worker:
@@ -110,52 +111,49 @@ class GuildLogs:
         if not os.path.exists(LOG_DIR):
             os.mkdir(LOG_DIR)
         last_time = None
-        if os.path.exists(self.log_file):
-            channels = {}
-            try:
-                last_time = os.path.getmtime(self.log_file)
-                gziped_data = None
-                await code_message(progress, "Reading saved history (1/4)...")
-                t0 = datetime.now()
-                with open(self.log_file, mode="rb") as f:
-                    gziped_data = f.read()
-                logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
-                if self.check_cancelled():
-                    return CANCELLED, 0
-                await code_message(progress, "Reading saved history (2/4)...")
-                t0 = datetime.now()
-                json_data = gzip.decompress(gziped_data)
-                del gziped_data
-                logging.info(
-                    f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
-                )
-                if self.check_cancelled():
-                    return CANCELLED, 0
-                await code_message(progress, "Reading saved history (3/4)...")
-                t0 = datetime.now()
-                channels = json.loads(json_data)
-                del json_data
-                logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
-                if self.check_cancelled():
-                    return CANCELLED, 0
-                await code_message(progress, "Reading saved history (4/4)...")
-                t0 = datetime.now()
-                self.channels = {
-                    int(id): ChannelLogs(channels[id], self) for id in channels
-                }
-                # remove invalid format
-                self.channels = {
-                    id: self.channels[id]
-                    for id in self.channels
-                    if self.channels[id].is_format()
-                }
-                logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms")
-            except json.decoder.JSONDecodeError:
-                logging.error(f"log {self.guild.id} > invalid JSON")
-            except IOError:
-                logging.error(f"log {self.guild.id} > cannot read")
-        else:
-            fast = False
+        if not os.path.exists(self.log_file):
+            return NO_FILE, 0
+        channels = {}
+        try:
+            last_time = os.path.getmtime(self.log_file)
+            gziped_data = None
+            await code_message(progress, "Reading saved history (1/4)...")
+            t0 = datetime.now()
+            with open(self.log_file, mode="rb") as f:
+                gziped_data = f.read()
+            logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
+            if self.check_cancelled():
+                return CANCELLED, 0
+            await code_message(progress, "Reading saved history (2/4)...")
+            t0 = datetime.now()
+            json_data = gzip.decompress(gziped_data)
+            del gziped_data
+            logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
+            if self.check_cancelled():
+                return CANCELLED, 0
+            await code_message(progress, "Reading saved history (3/4)...")
+            t0 = datetime.now()
+            channels = json.loads(json_data)
+            del json_data
+            logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
+            if self.check_cancelled():
+                return CANCELLED, 0
+            await code_message(progress, "Reading saved history (4/4)...")
+            t0 = datetime.now()
+            self.channels = {
+                int(id): ChannelLogs(channels[id], self) for id in channels
+            }
+            # remove invalid format
+            self.channels = {
+                id: self.channels[id]
+                for id in self.channels
+                if self.channels[id].is_format()
+            }
+            logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms")
+        except json.decoder.JSONDecodeError:
+            logging.error(f"log {self.guild.id} > invalid JSON")
+        except IOError:
+            logging.error(f"log {self.guild.id} > cannot read")
 
         if len(target_channels) == 0:
             target_channels = (
@@ -327,6 +325,29 @@ class GuildLogs:
                 reference=message,
             )
 
+    @staticmethod
+    def init_log(guild: List[discord.Guild]):
+        if not os.path.exists(LOG_DIR):
+            os.mkdir(LOG_DIR)
+        filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
+        if not os.path.exists(filename):
+            with open(filename, mode="wb") as f:
+                f.write(gzip.compress(bytes("{}", "utf-8")))
+            logging.info(f"log {guild.id} > created")
+        else:
+            logging.info(f"log {guild.id} > already exists")
+
+    @staticmethod
+    def remove_log(guild: List[discord.Guild]):
+        if not os.path.exists(LOG_DIR):
+            os.mkdir(LOG_DIR)
+        filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}")
+        if os.path.exists(filename):
+            os.unlink(filename)
+            logging.info(f"log {guild.id} > removed")
+        else:
+            logging.info(f"log {guild.id} > does not exists")
+
     @staticmethod
     def check_logs(guilds: List[discord.Guild]):
         logging.info(f"checking logs...")
diff --git a/src/main.py b/src/main.py
index 69f6b0a..9bd111c 100644
--- a/src/main.py
+++ b/src/main.py
@@ -6,7 +6,7 @@ if sys.version_info < (3, 7):
     print("Please upgrade your Python version to 3.7.0 or higher")
     sys.exit(1)
 
-from utils import emojis
+from utils import emojis, gdpr
 from scanners import (
     EmotesScanner,
     FullScanner,
@@ -57,7 +57,13 @@ bot.register_command(
     "(cancel|stop)",
     GuildLogs.cancel,
     "cancel: stop current analysis (not launched with fast)",
-    "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
+    "```\n%cancel: Stop current analysis (not launched with fast)\n```",
+)
+bot.register_command(
+    "gdpr",
+    gdpr.process,
+    "gdpr: displays GDPR information",
+    gdpr.HELP,
 )
 bot.register_command(
     "words",
diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py
index 9a7e712..772f63f 100644
--- a/src/scanners/scanner.py
+++ b/src/scanners/scanner.py
@@ -5,8 +5,15 @@ import logging
 import re
 import discord
 
-from utils import no_duplicate, get_intro, delta
-from logs import GuildLogs, ChannelLogs, MessageLog, ALREADY_RUNNING, CANCELLED
+from utils import no_duplicate, get_intro, delta, gdpr
+from logs import (
+    GuildLogs,
+    ChannelLogs,
+    MessageLog,
+    ALREADY_RUNNING,
+    CANCELLED,
+    NO_FILE,
+)
 
 
 class Scanner(ABC):
@@ -106,6 +113,8 @@ class Scanner(ABC):
                         "An analysis is already running on this server, please be patient.",
                         reference=message,
                     )
+                elif total_msg == NO_FILE:
+                    await message.channel.send(gdpr.TEXT)
                 else:
                     self.msg_count = 0
                     self.total_msg = 0
diff --git a/src/utils/gdpr.py b/src/utils/gdpr.py
new file mode 100644
index 0000000..e19aa98
--- /dev/null
+++ b/src/utils/gdpr.py
@@ -0,0 +1,68 @@
+import discord
+
+from logs import GuildLogs
+
+
+HELP = (
+    "```\n"
+    + "%gdpr: Displays GDPR information\n"
+    + "arguments:\n"
+    + "* agree - agree to GDPR\n"
+    + "* revoke - remove this server's data\n"
+    + "```"
+)
+
+TEXT = (
+    ""
+    + "__**About Analyst-bot's data usage**__\n"
+    + "**TL;DR**\n"
+    + "Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 12 months or until the bot is leaving the guild/server.\n"
+    + "**Data collection**\n"
+    + "Analyst-bot collects a Discord guild/server's history when asked to.\n"
+    + "This includes:\n"
+    + "- Visible text channel names\n"
+    + "- Visible text messages: date and time of creation and edition,  author,  content,  reactions and other available metadata (pinned, tts, etc.)\n"
+    + "This does __not__ includes:\n"
+    + "- Voice channels and not visible channels\n"
+    + "- Not visible text messages\n"
+    + "- Visible text messages' embedded content, images and other attachments\n"
+    + "**Data processing**\n"
+    + "Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.\n"
+    + "**Data storage and retain policy**\n"
+    + "Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.\n"
+    + "Any collected data are retained maximum 12 months until deletion or when the bot is leaving a guild/server.\n"
+    + "**Data sharing**\n"
+    + "Analyst-bot does not share the data collected with any third-party.\n"
+    + "**Right to retract**\n"
+    + "If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.\n"
+    + "**Terms agreement**\n"
+    + "By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.\n"
+    + "\n"
+    + "*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*\n"
+    + "\n"
+    + "Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again."
+)
+
+AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server."
+
+REVOKE_TEXT = "This guild/server's data has been deleted. To run new analysis you must agree to the terms again."
+
+
+async def process(client: discord.client, message: discord.Message, *args: str):
+    args = list(args)
+    if len(args) == 1:
+        await message.channel.send(TEXT)
+    elif len(args) > 2:
+        await message.channel.send(f"Too many arguments", reference=message)
+    elif args[1] == "help":
+        await message.channel.send(HELP, reference=message)
+    elif args[1] in ["agree", "accept"]:
+        GuildLogs.init_log(message.channel.guild)
+        await message.channel.send(AGREE_TEXT, reference=message)
+    elif args[1] in ["revoke", "cancel"]:
+        GuildLogs.remove_log(message.channel.guild)
+        await message.channel.send(REVOKE_TEXT, reference=message)
+    else:
+        await message.channel.send(
+            f"Unrecognized argument: `{args[1]}`", reference=message
+        )

From 737806a4bacd7740ab076ccce3ae8dca8af00f56 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 15:00:53 +0200
Subject: [PATCH 08/20] updated readme

---
 README.md         | 2 ++
 src/utils/gdpr.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c130ef5..02b294d 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@
 * %first - read first message
 * %rand - read a random message
 * %last - read last message
+* %gdpr - displays GDPR information
 * %emojis - rank emotes by their usage
   * arguments:
     * <n> - top <n> emojis, default is 20
@@ -107,6 +108,7 @@ python3 src/main.py
 * **v1.13**
   * improved scan `%words`
   * remove old and unused logs at start and guild leaving
+  * GDPR disclaimer before scanning
 * **v1.12**
   * more scans: `%words`
   * concurrent `fast` analysis
diff --git a/src/utils/gdpr.py b/src/utils/gdpr.py
index e19aa98..968582c 100644
--- a/src/utils/gdpr.py
+++ b/src/utils/gdpr.py
@@ -59,7 +59,7 @@ async def process(client: discord.client, message: discord.Message, *args: str):
     elif args[1] in ["agree", "accept"]:
         GuildLogs.init_log(message.channel.guild)
         await message.channel.send(AGREE_TEXT, reference=message)
-    elif args[1] in ["revoke", "cancel"]:
+    elif args[1] in ["revoke", "cancel", "remove", "delete"]:
         GuildLogs.remove_log(message.channel.guild)
         await message.channel.send(REVOKE_TEXT, reference=message)
     else:

From 5f903db9297bd03cd0de3d3748556a386498cce8 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 15:02:08 +0200
Subject: [PATCH 09/20] updated version before forgeting

---
 src/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main.py b/src/main.py
index 9bd111c..d2278e5 100644
--- a/src/main.py
+++ b/src/main.py
@@ -33,7 +33,7 @@ emojis.load_emojis()
 
 bot = Bot(
     "Discord Analyst",
-    "1.12",
+    "1.13",
     alias="%",
 )
 

From b7a6f3313ba62954bac78da5c148fd657e484652 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 15:34:03 +0200
Subject: [PATCH 10/20] factorized help and triple-quote multi-line

---
 src/scanners/__init__.py            |  2 +-
 src/scanners/channels_scanner.py    | 16 +++----
 src/scanners/composition_scanner.py | 12 +----
 src/scanners/emotes_scanner.py      | 25 +++++-----
 src/scanners/first_scanner.py       |  5 +-
 src/scanners/frequency_scanner.py   | 14 ++----
 src/scanners/full_scanner.py        | 12 +----
 src/scanners/history_scanner.py     | 13 -----
 src/scanners/last_scanner.py        |  5 +-
 src/scanners/mentioned_scanner.py   | 20 ++++----
 src/scanners/mentions_scanner.py    | 21 ++++-----
 src/scanners/messages_scanner.py    | 16 +++----
 src/scanners/presence_scanner.py    | 12 +----
 src/scanners/random_scanner.py      |  5 +-
 src/scanners/reactions_scanner.py   | 15 +++---
 src/scanners/words_scanner.py       | 21 ++++-----
 src/utils/gdpr.py                   | 73 ++++++++++++++---------------
 src/utils/utils.py                  | 32 ++++++++++---
 18 files changed, 134 insertions(+), 185 deletions(-)

diff --git a/src/scanners/__init__.py b/src/scanners/__init__.py
index 21fd922..ed9141d 100644
--- a/src/scanners/__init__.py
+++ b/src/scanners/__init__.py
@@ -11,4 +11,4 @@ from .reactions_scanner import ReactionsScanner
 from .first_scanner import FirstScanner
 from .last_scanner import LastScanner
 from .random_scanner import RandomScanner
-from .words_scanner import WordsScanner
\ No newline at end of file
+from .words_scanner import WordsScanner
diff --git a/src/scanners/channels_scanner.py b/src/scanners/channels_scanner.py
index b3e7763..c766fb4 100644
--- a/src/scanners/channels_scanner.py
+++ b/src/scanners/channels_scanner.py
@@ -8,21 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention
 
 
 class ChannelsScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%chan: Rank channels by their messages\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n>, default is 10\n"
-            + "* all/everyone - include bots\n"
-            + "Example: %chan 10 @user\n"
-            + "```"
+        return generate_help(
+            "chan",
+            "Rank channels by their messages",
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+            example="5 @user",
         )
 
     def __init__(self):
diff --git a/src/scanners/composition_scanner.py b/src/scanners/composition_scanner.py
index daec2f7..a2f3822 100644
--- a/src/scanners/composition_scanner.py
+++ b/src/scanners/composition_scanner.py
@@ -8,21 +8,13 @@ import discord
 from .scanner import Scanner
 from data_types import Composition
 from logs import ChannelLogs, MessageLog
-from utils import emojis, COMMON_HELP_ARGS
+from utils import emojis, generate_help
 
 
 class CompositionScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%compo: Show composition statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %compo #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("compo", "Show composition statistics")
 
     def __init__(self):
         super().__init__(
diff --git a/src/scanners/emotes_scanner.py b/src/scanners/emotes_scanner.py
index 8c7b93f..b126812 100644
--- a/src/scanners/emotes_scanner.py
+++ b/src/scanners/emotes_scanner.py
@@ -8,24 +8,23 @@ import discord
 from logs import ChannelLogs, MessageLog
 from data_types import Emote, get_emote_dict
 from .scanner import Scanner
-from utils import emojis, COMMON_HELP_ARGS, plural, precise
+from utils import emojis, generate_help, plural, precise
 
 
 class EmotesScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%emojis: Rank emojis by their usage\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n> emojis, default is 20\n"
-            + "* all - list all common emojis in addition to this guild's\n"
-            + "* members - show top member for each emojis\n"
-            + "* sort:usage/reaction - other sorting methods\n"
-            + "* everyone - include bots\n"
-            + "Example: %emojis 10 all #mychannel1 #mychannel2 @user\n"
-            + "```"
+        return generate_help(
+            "emojis",
+            "Rank emojis by their usage",
+            args=[
+                "<n> - top <n> emojis, default is 20",
+                "all - list all common emojis in addition to this guild's",
+                "members - show top member for each emojis",
+                "sort:usage/reaction - other sorting methods",
+                "everyone - include bots",
+            ],
+            example="10 all #mychannel1 #mychannel2 @user",
         )
 
     def __init__(self):
diff --git a/src/scanners/first_scanner.py b/src/scanners/first_scanner.py
index 766b145..1048e2c 100644
--- a/src/scanners/first_scanner.py
+++ b/src/scanners/first_scanner.py
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs
 
 from .history_scanner import HistoryScanner
+from utils import generate_help
 
 
 class FirstScanner(HistoryScanner):
     @staticmethod
     def help() -> str:
-        return super(FirstScanner, FirstScanner).help(
-            cmd="first", text="Read first message"
-        )
+        return generate_help("first", "Read first message")
 
     def __init__(self):
         super().__init__(help=FirstScanner.help())
diff --git a/src/scanners/frequency_scanner.py b/src/scanners/frequency_scanner.py
index fac0a27..e0a4818 100644
--- a/src/scanners/frequency_scanner.py
+++ b/src/scanners/frequency_scanner.py
@@ -8,21 +8,13 @@ import discord
 from .scanner import Scanner
 from data_types import Frequency
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help
 
 
 class FrequencyScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%freq: Show frequency-related statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %freq #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("freq", "Show frequency-related statistics")
 
     def __init__(self):
         super().__init__(
@@ -55,7 +47,7 @@ class FrequencyScanner(Scanner):
         freq: Frequency,
         raw_members: List[int],
         *,
-        all_messages: bool
+        all_messages: bool,
     ) -> bool:
         impacted = False
         # If author is included in the selection (empty list is all)
diff --git a/src/scanners/full_scanner.py b/src/scanners/full_scanner.py
index 22149bd..ac5cb0e 100644
--- a/src/scanners/full_scanner.py
+++ b/src/scanners/full_scanner.py
@@ -8,21 +8,13 @@ from .scanner import Scanner
 from . import FrequencyScanner, CompositionScanner, PresenceScanner
 from data_types import Frequency, Composition, Presence
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help
 
 
 class FullScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%scan: Show full statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %scan #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("scan", "Show full statistics")
 
     def __init__(self):
         super().__init__(
diff --git a/src/scanners/history_scanner.py b/src/scanners/history_scanner.py
index c61872e..5a3ae4c 100644
--- a/src/scanners/history_scanner.py
+++ b/src/scanners/history_scanner.py
@@ -7,22 +7,9 @@ import discord
 from .scanner import Scanner
 from data_types import History
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
 
 
 class HistoryScanner(Scanner, ABC):
-    @staticmethod
-    def help(*, cmd: str, text: str) -> str:
-        return (
-            "```\n"
-            + f"%{cmd}: {text}\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %{cmd} #mychannel1 @user\n"
-            + "```"
-        )
-
     def __init__(self, *, help: str):
         super().__init__(
             has_digit_args=True,
diff --git a/src/scanners/last_scanner.py b/src/scanners/last_scanner.py
index 7713195..3d8cbf0 100644
--- a/src/scanners/last_scanner.py
+++ b/src/scanners/last_scanner.py
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs
 
 from .history_scanner import HistoryScanner
+from utils import generate_help
 
 
 class LastScanner(HistoryScanner):
     @staticmethod
     def help() -> str:
-        return super(LastScanner, LastScanner).help(
-            cmd="last", text="Read last message"
-        )
+        return generate_help("last", "Read last message")
 
     def __init__(self):
         super().__init__(help=LastScanner.help())
diff --git a/src/scanners/mentioned_scanner.py b/src/scanners/mentioned_scanner.py
index 8cf74d4..fa6c09e 100644
--- a/src/scanners/mentioned_scanner.py
+++ b/src/scanners/mentioned_scanner.py
@@ -8,22 +8,18 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, plural, precise, mention, alt_mention
+from utils import generate_help, plural, precise, mention, alt_mention
 
 
 class MentionedScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%mentioned: Rank specific user's mentions by their usage\n"
-            + "arguments:\n"
-            + "* @member/me - (required) one or more member\n"
-            + "\n".join(COMMON_HELP_ARGS.split("\n")[1:])
-            + "* <n> - top <n> mentions, default is 10\n"
-            + "* all - include bots mentions\n"
-            + "Example: %mentioned 10 @user\n"
-            + "```"
+        return generate_help(
+            "mentioned",
+            "Rank specific user's mentions by their usage",
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+            example="5 @user",
+            replace_args=[" @member/me - (required) one or more member"],
         )
 
     def __init__(self):
@@ -45,7 +41,7 @@ class MentionedScanner(Scanner):
                 "You need to mention at least one member or use `me`", reference=message
             )
             return False
-        self.all_mentions = "all" in args
+        self.all_mentions = "all" in args or "everyone" in args
         # Create mentions dict
         self.mentions = defaultdict(Counter)
         return True
diff --git a/src/scanners/mentions_scanner.py b/src/scanners/mentions_scanner.py
index 8890390..50a0f5c 100644
--- a/src/scanners/mentions_scanner.py
+++ b/src/scanners/mentions_scanner.py
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
 from utils import (
-    COMMON_HELP_ARGS,
+    generate_help,
     plural,
     precise,
     mention,
@@ -22,16 +22,15 @@ from utils import (
 class MentionsScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%mentions: Rank mentions by their usage\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n> mentions, default is 10\n"
-            + "* all - show role/channel/everyone/here mentions\n"
-            + "* everyone - include bots mentions\n"
-            + "Example: %mentions 10 #mychannel1 #mychannel2 @user\n"
-            + "```"
+        return generate_help(
+            "mentions",
+            "Rank mentions by their usage",
+            args=[
+                "<n> - top <n>, default is 10",
+                "all - show role/channel/everyone/here mentions",
+                "everyone - include bots mentions",
+            ],
+            example="10 #mychannel1 #mychannel2 @user",
         )
 
     def __init__(self):
diff --git a/src/scanners/messages_scanner.py b/src/scanners/messages_scanner.py
index f576057..a79735e 100644
--- a/src/scanners/messages_scanner.py
+++ b/src/scanners/messages_scanner.py
@@ -8,21 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention
 
 
 class MessagesScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%msg: Rank users by their messages\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n>, default is 10\n"
-            + "* all/everyone - include bots\n"
-            + "Example: %msg 10 #channel\n"
-            + "```"
+        return generate_help(
+            "msg",
+            "Rank users by their messages",
+            args=["<n> - top <n>, default is 10", "all/everyone - include bots"],
+            example="10 #channel",
         )
 
     def __init__(self):
diff --git a/src/scanners/presence_scanner.py b/src/scanners/presence_scanner.py
index b19e723..5e39931 100644
--- a/src/scanners/presence_scanner.py
+++ b/src/scanners/presence_scanner.py
@@ -7,21 +7,13 @@ import discord
 from .scanner import Scanner
 from data_types import Presence
 from logs import ChannelLogs, MessageLog
-from utils import COMMON_HELP_ARGS
+from utils import generate_help
 
 
 class PresenceScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%pres: Show presence statistics\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* all/everyone - include bots\n"
-            + "Example: %pres #mychannel1 @user\n"
-            + "```"
-        )
+        return generate_help("pres", "Show presence statistics")
 
     def __init__(self):
         super().__init__(
diff --git a/src/scanners/random_scanner.py b/src/scanners/random_scanner.py
index 9ef520b..f4fb7a9 100644
--- a/src/scanners/random_scanner.py
+++ b/src/scanners/random_scanner.py
@@ -3,14 +3,13 @@ from typing import List
 # Custom libs
 
 from .history_scanner import HistoryScanner
+from utils import generate_help
 
 
 class RandomScanner(HistoryScanner):
     @staticmethod
     def help() -> str:
-        return super(RandomScanner, RandomScanner).help(
-            cmd="rand", text="Read a random message"
-        )
+        return generate_help("rand", "Read a random message")
 
     def __init__(self):
         super().__init__(help=RandomScanner.help())
diff --git a/src/scanners/reactions_scanner.py b/src/scanners/reactions_scanner.py
index bb84387..3603a06 100644
--- a/src/scanners/reactions_scanner.py
+++ b/src/scanners/reactions_scanner.py
@@ -8,20 +8,17 @@ import discord
 from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
-from utils import COMMON_HELP_ARGS, mention, channel_mention
+from utils import generate_help, mention, channel_mention
 
 
 class ReactionsScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%react: Rank users by their reactions\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - top <n>, default is 10\n"
-            + "Example: %react 10 #channel\n"
-            + "```"
+        return generate_help(
+            "react",
+            "Rank users by their reactions",
+            args=["<n> - top <n>, default is 10"],
+            example="10 #channel",
         )
 
     def __init__(self):
diff --git a/src/scanners/words_scanner.py b/src/scanners/words_scanner.py
index c31bf9b..f7f6dd7 100644
--- a/src/scanners/words_scanner.py
+++ b/src/scanners/words_scanner.py
@@ -9,7 +9,7 @@ from logs import ChannelLogs, MessageLog
 from .scanner import Scanner
 from data_types import Counter
 from utils import (
-    COMMON_HELP_ARGS,
+    generate_help,
     plural,
     precise,
 )
@@ -18,16 +18,15 @@ from utils import (
 class WordsScanner(Scanner):
     @staticmethod
     def help() -> str:
-        return (
-            "```\n"
-            + "%words: (BETA) Rank words by their usage\n"
-            + "arguments:\n"
-            + COMMON_HELP_ARGS
-            + "* <n> - words containings <n> or more letters, default is 3\n"
-            + "* <n2> - top <n2> words, default is 10\n"
-            + "* everyone - include bots\n"
-            + "Example: %words 5 10 #mychannel1 #mychannel2 @user\n"
-            + "```"
+        return generate_help(
+            "words",
+            "(BETA) Rank words by their usage",
+            args=[
+                "<n> - words containings <n> or more letters, default is 3",
+                "<n2> - top <n2> words, default is 10",
+                "all/everyone - include bots",
+            ],
+            example="5 10 #mychannel1 #mychannel2 @user",
         )
 
     def __init__(self):
diff --git a/src/utils/gdpr.py b/src/utils/gdpr.py
index 968582c..5ae85c6 100644
--- a/src/utils/gdpr.py
+++ b/src/utils/gdpr.py
@@ -3,45 +3,42 @@ import discord
 from logs import GuildLogs
 
 
-HELP = (
-    "```\n"
-    + "%gdpr: Displays GDPR information\n"
-    + "arguments:\n"
-    + "* agree - agree to GDPR\n"
-    + "* revoke - remove this server's data\n"
-    + "```"
-)
+HELP = """```
+%gdpr: Displays GDPR information
+arguments:
+* agree - agree to GDPR
+* revoke - remove this server's data
+```"""
 
-TEXT = (
-    ""
-    + "__**About Analyst-bot's data usage**__\n"
-    + "**TL;DR**\n"
-    + "Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 12 months or until the bot is leaving the guild/server.\n"
-    + "**Data collection**\n"
-    + "Analyst-bot collects a Discord guild/server's history when asked to.\n"
-    + "This includes:\n"
-    + "- Visible text channel names\n"
-    + "- Visible text messages: date and time of creation and edition,  author,  content,  reactions and other available metadata (pinned, tts, etc.)\n"
-    + "This does __not__ includes:\n"
-    + "- Voice channels and not visible channels\n"
-    + "- Not visible text messages\n"
-    + "- Visible text messages' embedded content, images and other attachments\n"
-    + "**Data processing**\n"
-    + "Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.\n"
-    + "**Data storage and retain policy**\n"
-    + "Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.\n"
-    + "Any collected data are retained maximum 12 months until deletion or when the bot is leaving a guild/server.\n"
-    + "**Data sharing**\n"
-    + "Analyst-bot does not share the data collected with any third-party.\n"
-    + "**Right to retract**\n"
-    + "If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.\n"
-    + "**Terms agreement**\n"
-    + "By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.\n"
-    + "\n"
-    + "*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*\n"
-    + "\n"
-    + "Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again."
-)
+TEXT = """
+__**About Analyst-bot's data usage**__
+**TL;DR**
+Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
+**Data collection**
+Analyst-bot collects a Discord guild/server's history when asked to.
+This includes:
+- Visible text channel names
+- Visible text messages: date and time of creation and edition,  author,  content,  reactions and other available metadata (pinned, tts, etc.)
+This does __not__ includes:
+- Voice channels and not visible channels
+- Not visible text messages
+- Visible text messages' embedded content, images and other attachments
+**Data processing**
+Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
+**Data storage and retain policy**
+Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
+Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
+**Data sharing**
+Analyst-bot does not share the data collected with any third-party.
+**Right to retract**
+If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
+**Terms agreement**
+By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
+
+*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
+
+Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
+"""
 
 AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server."
 
diff --git a/src/utils/utils.py b/src/utils/utils.py
index 880d892..1447a40 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -7,13 +7,31 @@ from datetime import datetime
 
 # OTHER
 
-COMMON_HELP_ARGS = (
-    ""
-    + "* @member/me - filter for one or more member\n"
-    + "* #channel/here - filter for one or more channel\n"
-    + "* fast - only read cache\n"
-    + "* fresh - does not read cache (long)\n"
-)
+COMMON_HELP_ARGS = [
+    "@member/me - filter for one or more member",
+    "#channel/here - filter for one or more channel",
+    "fast - only read cache",
+    "fresh - does not read cache (long)",
+]
+
+
+def generate_help(
+    cmd: str,
+    info: str,
+    *,
+    args=["all/everyone - include bots"],
+    example="#mychannel1 @user",
+    replace_args=[],
+):
+    arg_list = "* " + "\n* ".join(
+        replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
+    )
+    return f"""```
+%{cmd}: {info}
+arguments:
+{arg_list}
+Example: %{cmd} {example}
+```"""
 
 
 def delta(t0: datetime):

From 2062f08721373e1ed4a5fd60b9b75ffa78674cab Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 17:39:42 +0200
Subject: [PATCH 11/20] start en stop dates

---
 README.md                   |  5 ++
 requirements.txt            |  1 +
 src/data_types/frequency.py |  7 ++-
 src/logs/channel_logs.py    |  8 +++
 src/scanners/scanner.py     | 74 +++++++++++++++++++++++-----
 src/utils/utils.py          | 98 +++++++++++++++++++++++++++----------
 6 files changed, 156 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 02b294d..0970c0a 100644
--- a/README.md
+++ b/README.md
@@ -53,9 +53,13 @@
 * Common arguments:
     * @member/me: filter for one or more member
     * #channel/here: filter for one or more channel
+    * <date1> - filter after <date1>
+    * <date2> - filter before <date2>
     * all/everyone - include bots messages
     * fast: only read cache
     * fresh: does not read cache
+
+(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
 ```
 
 ## Running this bot
@@ -109,6 +113,7 @@ python3 src/main.py
   * improved scan `%words`
   * remove old and unused logs at start and guild leaving
   * GDPR disclaimer before scanning
+  * start and stop dates
 * **v1.12**
   * more scans: `%words`
   * concurrent `fast` analysis
diff --git a/requirements.txt b/requirements.txt
index 95a454b..7bc9d08 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 discord.py
 python-dotenv
+python-dateutil
 git+git://github.com/Klemek/miniscord.git
diff --git a/src/data_types/frequency.py b/src/data_types/frequency.py
index 14cf5dd..075084c 100644
--- a/src/data_types/frequency.py
+++ b/src/data_types/frequency.py
@@ -38,7 +38,10 @@ class Frequency:
         *,
         member_specific: bool,
     ) -> List[str]:
+        self.dates.sort()
         delta = self.dates[-1] - self.dates[0]
+        if delta.days == 0:
+            delta = timedelta(days=1)
         total_msg = len(self.dates)
         busiest_weekday = top_key(self.week)
         busiest_hour = top_key(self.day)
@@ -56,7 +59,9 @@ class Frequency:
             f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
             f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
             f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
-            f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)",
+            f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
+            if self.busiest_day is not None
+            else "",
             f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
             f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
             f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py
index 86c7a28..83681e9 100644
--- a/src/logs/channel_logs.py
+++ b/src/logs/channel_logs.py
@@ -1,5 +1,6 @@
 from typing import Union, Tuple, Any
 import discord
+from discord import message
 
 from . import MessageLog
 from utils import FakeMessage
@@ -17,6 +18,7 @@ class ChannelLogs:
             self.last_message_id = None
             self.format = FORMAT
             self.messages = []
+            self.start_date = None
         elif isinstance(channel, dict):
             self.format = channel["format"] if "format" in channel else None
             if not self.is_format():
@@ -31,6 +33,9 @@ class ChannelLogs:
             self.messages = [
                 MessageLog(message, self) for message in channel["messages"]
             ]
+            self.start_date = (
+                self.messages[-1].created_at if len(self.messages) > 0 else None
+            )
 
     def is_format(self):
         return self.format == FORMAT
@@ -80,6 +85,9 @@ class ChannelLogs:
         except discord.errors.HTTPException:
             yield -1, True
             return  # When an exception occurs (like Forbidden)
+        self.start_date = (
+            self.messages[-1].created_at if len(self.messages) > 0 else None
+        )
         yield len(self.messages), True
 
     def dict(self) -> dict:
diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py
index 772f63f..8931318 100644
--- a/src/scanners/scanner.py
+++ b/src/scanners/scanner.py
@@ -5,7 +5,16 @@ import logging
 import re
 import discord
 
-from utils import no_duplicate, get_intro, delta, gdpr
+
+from utils import (
+    no_duplicate,
+    get_intro,
+    delta,
+    gdpr,
+    ISO8601_REGEX,
+    parse_time,
+    RELATIVE_TIME,
+)
 from logs import (
     GuildLogs,
     ChannelLogs,
@@ -54,22 +63,42 @@ class Scanner(ABC):
                 str(channel.id) for channel in message.channel_mentions
             ]
             str_mentions = [str(member.id) for member in message.mentions]
+            dates = []
             for i, arg in enumerate(args[1:]):
+                skip_check = False
                 if re.match(r"^<@!?\d+>$", arg):
                     arg = arg[3:-1] if "!" in arg else arg[2:-1]
                 elif re.match(r"^<#!?\d+>$", arg):
                     arg = arg[3:-1] if "!" in arg else arg[2:-1]
+                elif re.match(ISO8601_REGEX, arg) or arg in RELATIVE_TIME:
+                    dates += [parse_time(arg)]
+                    skip_check = True
+                    if len(dates) > 2:
+                        await message.channel.send(
+                            f"Too many date arguments: `{arg}`", reference=message
+                        )
+                        return
                 if (
                     arg not in self.valid_args + ["me", "here", "fast", "fresh"]
                     and (not arg.isdigit() or not self.has_digit_args)
                     and arg not in str_channel_mentions
                     and arg not in str_mentions
+                    and not skip_check
                 ):
                     await message.channel.send(
                         f"Unrecognized argument: `{arg}`", reference=message
                     )
                     return
 
+            self.start_datetime = None if len(dates) < 1 else min(dates)
+            self.stop_datetime = datetime.now() if len(dates) < 2 else max(dates)
+
+            if self.start_datetime is not None and self.start_datetime > datetime.now():
+                await message.channel.send(
+                    f"Start date is after today", reference=message
+                )
+                return
+
             # Get selected channels or all of them if no channel arguments
             self.channels = no_duplicate(message.channel_mentions)
 
@@ -103,6 +132,18 @@ class Scanner(ABC):
                 total_msg, total_chan = await logs.load(
                     progress, self.channels, fast="fast" in args, fresh="fresh" in args
                 )
+                if self.start_datetime is not None:
+                    self.start_datetime = max(
+                        self.start_datetime,
+                        min(
+                            [
+                                logs.channels[channel.id].start_date
+                                for channel in self.channels
+                                if channel.id in logs.channels
+                                and logs.channels[channel.id].start_date is not None
+                            ]
+                        ),
+                    )
                 if total_msg == CANCELLED:
                     await message.channel.send(
                         "Operation cancelled by user",
@@ -127,13 +168,21 @@ class Scanner(ABC):
                                 [
                                     self.compute_message(channel_logs, message_log)
                                     for message_log in channel_logs.messages
+                                    if (
+                                        self.start_datetime is None
+                                        or message_log.created_at >= self.start_datetime
+                                    )
+                                    and (
+                                        self.stop_datetime is None
+                                        or message_log.created_at <= self.stop_datetime
+                                    )
                                 ]
                             )
                             self.total_msg += len(channel_logs.messages)
                             self.msg_count += count
                             self.chan_count += 1 if count > 0 else 0
                     logging.info(f"scan {guild.id} > scanned in {delta(t0):,}ms")
-                    if self.total_msg == 0:
+                    if self.msg_count == 0:
                         await message.channel.send(
                             "There are no messages found matching the filters",
                             reference=message,
@@ -150,21 +199,24 @@ class Scanner(ABC):
                                 self.members,
                                 self.msg_count,
                                 self.chan_count,
+                                self.start_datetime,
+                                self.stop_datetime,
                             )
                         )
                         logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
                         response = ""
                         first = True
                         for r in results:
-                            if len(response + "\n" + r) > 2000:
-                                await message.channel.send(
-                                    response,
-                                    reference=message if first else None,
-                                    allowed_mentions=discord.AllowedMentions.none(),
-                                )
-                                first = False
-                                response = ""
-                            response += "\n" + r
+                            if r:
+                                if len(response + "\n" + r) > 2000:
+                                    await message.channel.send(
+                                        response,
+                                        reference=message if first else None,
+                                        allowed_mentions=discord.AllowedMentions.none(),
+                                    )
+                                    first = False
+                                    response = ""
+                                response += "\n" + r
                         if len(response) > 0:
                             await message.channel.send(
                                 response,
diff --git a/src/utils/utils.py b/src/utils/utils.py
index 1447a40..a8f2ad5 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -1,15 +1,21 @@
+from calendar import month
 from typing import List, Dict, Union, Optional, Any
 import os
 import logging
 import discord
 import math
-from datetime import datetime
+from datetime import datetime, timedelta
+import re
+import dateutil.parser
+from dateutil.relativedelta import relativedelta
 
 # OTHER
 
 COMMON_HELP_ARGS = [
     "@member/me - filter for one or more member",
     "#channel/here - filter for one or more channel",
+    "<date1> - filter after <date1>",
+    "<date2> - filter before <date2>",
     "fast - only read cache",
     "fresh - does not read cache (long)",
 ]
@@ -30,6 +36,7 @@ def generate_help(
 %{cmd}: {info}
 arguments:
 {arg_list}
+(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
 Example: %{cmd} {example}
 ```"""
 
@@ -153,6 +160,37 @@ def precise(p: float, *, precision: int = 2) -> str:
 
 # DATE FORMATTING
 
+ISO8601_REGEX = r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$"
+ISO8601_FULL = "0000-01-01T00:00:00"
+
+
+def parse_iso_datetime(str_date: str) -> datetime:
+    if re.match(
+        "^\d{4}(-\d{2}(-\d{2}(T\d{2}(:\d{2}(:\d{2}(:\d{2})?)?)?)?)?)?$", str_date
+    ):
+        str_date = str_date + "0000-01-01T00:00:00"[len(str_date) :]
+    return dateutil.parser.parse(str_date)
+
+
+RELATIVE_TIME = {
+    "today": relativedelta(days=1),
+    "yesterday": relativedelta(days=2),
+    "week": relativedelta(weeks=1),
+    "month": relativedelta(months=1),
+    "year": relativedelta(years=1),
+}
+
+
+def parse_relative_time(src: str) -> datetime:
+    return datetime.utcnow() - RELATIVE_TIME[src]
+
+
+def parse_time(src: str) -> datetime:
+    if src in RELATIVE_TIME:
+        return parse_relative_time(src)
+    else:
+        return parse_iso_datetime(src)
+
 
 def str_date(date: datetime) -> str:
     return date.strftime("%d %b. %Y")  # 12 Jun. 2018
@@ -162,29 +200,37 @@ def str_datetime(date: datetime) -> str:
     return date.strftime("%H:%M, %d %b. %Y")  # 12:05, 12 Jun. 2018
 
 
-def from_now(src: Optional[datetime]) -> str:
-    if src is None:
-        return "never"
-    delay = datetime.utcnow() - src
+def str_delta(delay: timedelta) -> str:
     seconds = delay.seconds
     minutes = seconds // 60
     hours = minutes // 60
     if delay.days < 1:
         if hours < 1:
             if minutes == 0:
-                return "now"
+                return "no time"
             elif minutes == 1:
-                return "a minute ago"
+                return "a minute"
             else:
-                return f"{minutes} minutes ago"
+                return f"{minutes} minutes"
         elif hours == 1:
-            return "an hour ago"
+            return "an hour"
         else:
-            return f"{hours} hours ago"
+            return f"{hours} hours"
     elif delay.days == 1:
-        return "yesterday"
+        return "one day"
     else:
-        return f"{delay.days:,} days ago"
+        return f"{delay.days:,} days"
+
+
+def from_now(src: Optional[datetime]) -> str:
+    if src is None:
+        return "never"
+    output = str_delta(datetime.utcnow() - src)
+    if output == "no time":
+        return "now"
+    elif output == "one day":
+        return "yesterday"
+    return output + " ago"
 
 
 # APP SPECIFIC
@@ -197,46 +243,48 @@ def get_intro(
     members: List[discord.Member],
     nmm: int,  # number of messages impacted
     nc: int,  # number of impacted channels
+    start_datetime: datetime,
+    stop_datetime: datetime,
 ) -> str:
     """
     Get the introduction sentence of the response
     """
+    time_text = ""
+    if start_datetime is not None:
+        stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
+        time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
     # Show all data (members, channels) when it's less than 5 units
     if len(members) == 0:
         # Full scan of the server
         if full:
-            return f"{subject} in this server ({nc} channels, {nmm:,} messages):"
+            return f"{subject} in this server ({nc} channels, {nmm:,} messages){time_text}:"
         elif len(channels) < 5:
-            return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages:"
+            return f"{aggregate([c.mention for c in channels])} {subject.lower()} in {nmm:,} messages{time_text}:"
         else:
-            return (
-                f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages:"
-            )
+            return f"These {len(channels)} channels {subject.lower()} in {nmm:,} messages{time_text}:"
     elif len(members) < 5:
         if full:
-            return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages:"
+            return f"{aggregate([m.mention for m in members])} {subject.lower()} in {nmm:,} messages{time_text}:"
         elif len(channels) < 5:
             return (
                 f"{aggregate([m.mention for m in members])} on {aggregate([c.mention for c in channels])} "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
             )
         else:
             return (
                 f"{aggregate([m.mention for m in members])} on these {len(channels)} channels "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
             )
     else:
         if full:
-            return (
-                f"These {len(members)} members {subject.lower()} in {nmm:,} messages:"
-            )
+            return f"These {len(members)} members {subject.lower()} in {nmm:,} messages{time_text}:"
         elif len(channels) < 5:
             return (
                 f"These {len(members)} members on {aggregate([c.mention for c in channels])} "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
             )
         else:
             return (
                 f"These {len(members)} members on these {len(channels)} channels "
-                f"{subject.lower()} in {nmm:,} messages:"
+                f"{subject.lower()} in {nmm:,} messages{time_text}:"
             )

From 90a26bcc9ce47f50493e2585d1f821f8ea5f2d73 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 18:04:36 +0200
Subject: [PATCH 12/20] flattened results in data_type

---
 src/data_types/composition.py |  84 +++++++++++------------
 src/data_types/frequency.py   |  11 +--
 src/data_types/presence.py    | 124 ++++++++++++++++------------------
 src/logs/channel_logs.py      |   1 +
 src/utils/utils.py            |  14 +++-
 5 files changed, 115 insertions(+), 119 deletions(-)

diff --git a/src/data_types/composition.py b/src/data_types/composition.py
index e2c0a1c..69364a1 100644
--- a/src/data_types/composition.py
+++ b/src/data_types/composition.py
@@ -23,49 +23,45 @@ class Composition:
         self.spoilers = 0
 
     def to_string(self, msg_count: int) -> List[str]:
-        ret = []
-        ret += [
-            f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}"
-        ]
-        if self.plain_text > 0:
-            ret += [
-                f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
-            ]
-        if self.edited > 0:
-            ret += [
-                f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
-            ]
-        if self.everyone > 0:
-            ret += [
-                f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
-            ]
-        if self.mentions > 0:
-            ret += [
-                f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)",
-            ]
-        if self.answers > 0:
-            ret += [
-                f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
-            ]
         total_emotes = val_sum(self.emotes)
-        if total_emotes > 0:
-            top_emote = top_key(self.emotes)
-            ret += [
-                f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)",
-                f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})",
-            ]
-            if self.emote_only > 0:
-                ret += [
-                    f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
-                ]
-        if self.images > 0:
-            ret += [f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"]
-        if self.links > 0:
-            ret += [f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"]
-        if self.spoilers > 0:
-            ret += [
-                f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
-            ]
-        if self.tts > 0:
-            ret += [f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"]
+        top_emote = top_key(self.emotes)
+        ret = [
+            f"- **avg. characters / message**: {self.total_characters/msg_count:.2f}",
+            f"- **plain text messages**: {self.plain_text:,} ({percent(self.plain_text/msg_count)})"
+            if self.plain_text > 0
+            else "",
+            f"- **edited messages**: {self.edited:,} ({percent(self.edited/msg_count)})"
+            if self.edited > 0
+            else "",
+            f"- **@\u200beveryone**: {self.everyone:,} ({percent(self.everyone/msg_count)})"
+            if self.everyone > 0
+            else "",
+            f"- **mentions**: {self.mentions:,} (in {percent(self.mention_msg/msg_count)} of msg, avg. {precise(self.mentions/msg_count)}/msg)"
+            if self.mentions > 0
+            else "",
+            f"- **answers**: {self.answers:,} ({percent(self.answers/msg_count)})"
+            if self.answers > 0
+            else "",
+            f"- **emojis**: {total_emotes:,} (in {percent(self.emote_msg/msg_count)} of msg, avg. {precise(total_emotes/msg_count)}/msg)"
+            if total_emotes > 0
+            else "",
+            f"- **most used emoji**: {top_emote} ({plural(self.emotes[top_emote], 'time')}, {percent(self.emotes[top_emote]/total_emotes)})"
+            if total_emotes > 0
+            else "",
+            f"- **emoji-only messages**: {self.emote_only:,} ({percent(self.emote_only/msg_count)})"
+            if self.emote_only > 0
+            else "",
+            f"- **images**: {self.images:,} ({percent(self.images/msg_count)})"
+            if self.images > 0
+            else "",
+            f"- **links**: {self.links:,} ({percent(self.link_msg/msg_count)})"
+            if self.links > 0
+            else "",
+            f"- **spoilers**: {self.spoilers:,} ({percent(self.spoilers/msg_count)})"
+            if self.spoilers > 0
+            else "",
+            f"- **tts messages**: {self.tts:,} ({percent(self.tts/msg_count)})"
+            if self.tts > 0
+            else "",
+        ]
         return ret
diff --git a/src/data_types/frequency.py b/src/data_types/frequency.py
index 075084c..b0f00ec 100644
--- a/src/data_types/frequency.py
+++ b/src/data_types/frequency.py
@@ -67,13 +67,8 @@ class Frequency:
             f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
             f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})",
             f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg",
+            f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
+            if member_specific
+            else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})",
         ]
-        if member_specific:
-            ret += [
-                f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
-            ]
-        else:
-            ret += [
-                f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})"
-            ]
         return ret
diff --git a/src/data_types/presence.py b/src/data_types/presence.py
index 778881a..682774b 100644
--- a/src/data_types/presence.py
+++ b/src/data_types/presence.py
@@ -25,74 +25,70 @@ class Presence:
         show_top_channel: bool,
         member_specific: bool,
     ) -> List[str]:
-        ret = []
         if chan_count is None:
             type = "server's"
         elif chan_count == 1:
             type = "channel's"
         else:
             type = "channels'"
-        if member_specific:
-            ret += [
-                f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
-            ]
-        else:
-            top_member = top_key(self.messages)
-            ret += [
-                f"- **top messages**:  {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})"
-            ]
-        if show_top_channel:
-            top_channel = top_key(self.channel_usage)
-            channel_sum = val_sum(self.channel_usage)
-            found_in = sorted(
-                self.channel_usage,
-                key=lambda k: self.channel_usage[k] / self.channel_total[k],
-            )[-1]
-            ret += [
-                f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})",
-            ]
-            if member_specific:
-                ret += [
-                    f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
-                ]
-        if member_specific:
-            if len(self.mentions) > 0:
-                top_mention = top_key(self.mentions)
-                mention_sum = val_sum(self.mentions)
-                ret += [
-                    f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
-                    f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})",
-                ]
-        if len(self.mention_others) > 0:
-            top_mention = top_key(self.mention_others)
-            mention_sum = val_sum(self.mention_others)
-            if member_specific:
-                ret += [
-                    f"- **mentioned others**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})",
-                    f"- **mostly mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
-                ]
-            else:
-                top_member = top_key(self.mention_count)
-                ret += [
-                    f"- **mentioned**: {plural(mention_sum, 'time')} ({mention(top_member)}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
-                    f"- **top mentions**: {mention(top_member)} ({plural(self.mention_count[top_member], 'time')}, {percent(self.mention_count[top_member]/val_sum(self.mention_count))})",
-                    f"- **most mentioned**: {mention(top_mention)} ({plural(self.mention_others[top_mention], 'time')}, {percent(self.mention_others[top_mention]/mention_sum)})",
-                ]
-        if len(self.reactions) > 0:
-            total_used = val_sum(self.reactions)
-            top_reaction = top_key(self.reactions)
-            ret += [
-                f"- **reactions**: {plural(total_used, 'time')}",
-                f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_used)})",
-            ]
-            if member_specific:
-                ret[
-                    -2
-                ] += f" ({percent(total_used/val_sum(self.used_reaction))} of {type})"
-            else:
-                top_member = top_key(self.used_reaction)
-                ret.insert(
-                    -1,
-                    f"- **top reactions**: {mention(top_member)} ({plural(self.used_reaction[top_member], 'time')}, {percent(self.used_reaction[top_member]/val_sum(self.used_reaction))})",
-                )
+        top_member = top_key(self.messages)
+        top_channel = top_key(self.channel_usage)
+        channel_sum = val_sum(self.channel_usage)
+        found_in = top_key(
+            self.channel_usage,
+            key=lambda k: self.channel_usage[k] / self.channel_total[k],
+        )
+        top_mention = top_key(self.mentions)
+        mention_sum = val_sum(self.mentions)
+        top_mention_others = top_key(self.mention_others)
+        mention_others_sum = val_sum(self.mention_others)
+        top_member_mentioned = top_key(self.mention_count)
+        total_reaction_used = val_sum(self.reactions)
+        top_reaction = top_key(self.reactions)
+        top_reaction_member = top_key(self.used_reaction)
+
+        ret = [
+            f"- **messages**: {msg_count:,} ({percent(msg_count/total_msg)} of {type})"
+            if member_specific
+            else f"- **top messages**:  {mention(top_member)} ({self.messages[top_member]:,} msg, {percent(self.messages[top_member]/val_sum(self.messages))})",
+            f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
+            if show_top_channel
+            else "",
+            f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
+            if show_top_channel and member_specific
+            else "",
+            f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
+            if member_specific and len(self.mentions) > 0
+            else "",
+            f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
+            if member_specific and len(self.mentions) > 0
+            else "",
+            f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
+            if len(self.mention_others) > 0 and member_specific
+            else "",
+            f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
+            if len(self.mention_others) > 0 and member_specific
+            else "",
+            f"- **mentioned**: {plural(mention_others_sum, 'time')} ({mention(top_member_mentioned)}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
+            if len(self.mention_others) > 0 and not member_specific
+            else "",
+            f"- **top mentions**: {mention(top_member_mentioned)} ({plural(self.mention_count[top_member_mentioned], 'time')}, {percent(self.mention_count[top_member_mentioned]/val_sum(self.mention_count))})"
+            if len(self.mention_others) > 0 and not member_specific
+            else "",
+            f"- **most mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
+            if len(self.mention_others) > 0 and not member_specific
+            else "",
+            f"- **reactions**: {plural(total_reaction_used, 'time')}"
+            if len(self.reactions) > 0 and not member_specific
+            else "",
+            f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
+            if len(self.reactions) > 0 and member_specific
+            else "",
+            f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
+            if len(self.reactions) > 0 and not member_specific
+            else "",
+            f"- **most used reaction**: {top_reaction} ({plural(self.reactions[top_reaction], 'time')}, {percent(self.reactions[top_reaction]/total_reaction_used)})"
+            if len(self.reactions) > 0
+            else "",
+        ]
         return ret
diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py
index 83681e9..99fbfdb 100644
--- a/src/logs/channel_logs.py
+++ b/src/logs/channel_logs.py
@@ -94,5 +94,6 @@ class ChannelLogs:
         channel = dict(self.__dict__)
         channel.pop("channel", None)
         channel.pop("guild", None)
+        channel.pop("start_date", None)
         channel["messages"] = [message.dict() for message in self.messages]
         return channel
diff --git a/src/utils/utils.py b/src/utils/utils.py
index a8f2ad5..a823399 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -1,5 +1,5 @@
 from calendar import month
-from typing import List, Dict, Union, Optional, Any
+from typing import Callable, List, Dict, Union, Optional, Any
 import os
 import logging
 import discord
@@ -117,11 +117,19 @@ def no_duplicate(seq: list) -> list:
 # DICTS
 
 
-def top_key(d: Dict[Union[str, int], int]) -> Union[str, int]:
-    return sorted(d, key=lambda k: d[k])[-1]
+def top_key(
+    d: Dict[Union[str, int], int], key: Optional[Callable] = None
+) -> Union[str, int]:
+    if len(d) == 0:
+        return None
+    if key is None:
+        key = lambda k: d[k]
+    return sorted(d, key=key)[-1]
 
 
 def val_sum(d: Dict[Any, int]) -> int:
+    if len(d) == 0:
+        return 0
     return sum(d.values())
 
 

From 802e20809270c2ff8a19198b5ef03950a7d8ad72 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 18:19:40 +0200
Subject: [PATCH 13/20] alternative syntax for relative time range

---
 README.md               |  2 +-
 src/scanners/scanner.py |  4 ++--
 src/utils/utils.py      | 36 ++++++++++++++++++++++++++----------
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 0970c0a..6f79ee0 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@
     * fast: only read cache
     * fresh: does not read cache
 
-(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
+(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week' or '8days' or '1y')
 ```
 
 ## Running this bot
diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py
index 8931318..f30cb11 100644
--- a/src/scanners/scanner.py
+++ b/src/scanners/scanner.py
@@ -12,8 +12,8 @@ from utils import (
     delta,
     gdpr,
     ISO8601_REGEX,
+    RELATIVE_REGEX,
     parse_time,
-    RELATIVE_TIME,
 )
 from logs import (
     GuildLogs,
@@ -70,7 +70,7 @@ class Scanner(ABC):
                     arg = arg[3:-1] if "!" in arg else arg[2:-1]
                 elif re.match(r"^<#!?\d+>$", arg):
                     arg = arg[3:-1] if "!" in arg else arg[2:-1]
-                elif re.match(ISO8601_REGEX, arg) or arg in RELATIVE_TIME:
+                elif re.match(ISO8601_REGEX, arg) or re.match(RELATIVE_REGEX, arg):
                     dates += [parse_time(arg)]
                     skip_check = True
                     if len(dates) > 2:
diff --git a/src/utils/utils.py b/src/utils/utils.py
index a823399..b18265a 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -36,7 +36,7 @@ def generate_help(
 %{cmd}: {info}
 arguments:
 {arg_list}
-(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week/month/year')
+(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week' or '8days' or '1y')
 Example: %{cmd} {example}
 ```"""
 
@@ -180,21 +180,37 @@ def parse_iso_datetime(str_date: str) -> datetime:
     return dateutil.parser.parse(str_date)
 
 
-RELATIVE_TIME = {
-    "today": relativedelta(days=1),
-    "yesterday": relativedelta(days=2),
-    "week": relativedelta(weeks=1),
-    "month": relativedelta(months=1),
-    "year": relativedelta(years=1),
-}
+RELATIVE_REGEX = (
+    r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?))"
+)
 
 
 def parse_relative_time(src: str) -> datetime:
-    return datetime.utcnow() - RELATIVE_TIME[src]
+    timezone_delta = datetime.utcnow() - datetime.now()
+    if src == "today":
+        return datetime.today() + timezone_delta
+    elif src == "yesterday":
+        return datetime.today() - relativedelta(days=1) + timezone_delta
+    else:
+        m = re.match("(\d*)(\w+)", src)
+        delta = None
+        value = int(m[1])
+        unit = m[2][0]
+        if unit == "h":
+            delta = relativedelta(hours=value)
+        elif unit == "d":
+            delta = relativedelta(days=value)
+        elif unit == "w":
+            delta = relativedelta(weeks=value)
+        elif unit == "m":
+            delta = relativedelta(months=value)
+        elif unit == "y":
+            delta = relativedelta(years=value)
+        return datetime.utcnow() - delta
 
 
 def parse_time(src: str) -> datetime:
-    if src in RELATIVE_TIME:
+    if re.match(RELATIVE_REGEX, src):
         return parse_relative_time(src)
     else:
         return parse_iso_datetime(src)

From 8c0605797a4892a5914edc74ab7897f7c3613e80 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 18:23:46 +0200
Subject: [PATCH 14/20] clarified dates syntax

---
 README.md          | 2 +-
 src/utils/utils.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6f79ee0..b3cae58 100644
--- a/README.md
+++ b/README.md
@@ -59,7 +59,7 @@
     * fast: only read cache
     * fresh: does not read cache
 
-(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week' or '8days' or '1y')
+(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
 ```
 
 ## Running this bot
diff --git a/src/utils/utils.py b/src/utils/utils.py
index b18265a..e163f04 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -36,7 +36,7 @@ def generate_help(
 %{cmd}: {info}
 arguments:
 {arg_list}
-(Dates are formated 'yyyy-mm-dd' or 'yyyy-mm-ddThh:mm' (ISO 8601) or 'week' or '8days' or '1y')
+(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
 Example: %{cmd} {example}
 ```"""
 

From 5c570ee09b9996b8a6bf03ef70a302252ffb6ca3 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 18:25:51 +0200
Subject: [PATCH 15/20] fix no value in relative time

---
 src/utils/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/utils.py b/src/utils/utils.py
index e163f04..729d633 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -194,7 +194,7 @@ def parse_relative_time(src: str) -> datetime:
     else:
         m = re.match("(\d*)(\w+)", src)
         delta = None
-        value = int(m[1])
+        value = int(m[1]) if m[1] else 1
         unit = m[2][0]
         if unit == "h":
             delta = relativedelta(hours=value)

From 09161850c535f90c8434c82a39aa177877b8eb57 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 18:29:27 +0200
Subject: [PATCH 16/20] clarified not serialized attributes

---
 README.md                | 1 +
 src/logs/channel_logs.py | 7 ++++---
 src/logs/message_log.py  | 6 +++++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index b3cae58..ad239ac 100644
--- a/README.md
+++ b/README.md
@@ -114,6 +114,7 @@ python3 src/main.py
   * remove old and unused logs at start and guild leaving
   * GDPR disclaimer before scanning
   * start and stop dates
+  * bug fix and improvements
 * **v1.12**
   * more scans: `%words`
   * concurrent `fast` analysis
diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py
index 99fbfdb..39d854b 100644
--- a/src/logs/channel_logs.py
+++ b/src/logs/channel_logs.py
@@ -8,6 +8,8 @@ from utils import FakeMessage
 CHUNK_SIZE = 2000
 FORMAT = 3
 
+NOT_SERIALIZED = ["channel", "guild", "start_date"]
+
 
 class ChannelLogs:
     def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -92,8 +94,7 @@ class ChannelLogs:
 
     def dict(self) -> dict:
         channel = dict(self.__dict__)
-        channel.pop("channel", None)
-        channel.pop("guild", None)
-        channel.pop("start_date", None)
+        for key in NOT_SERIALIZED:
+            channel.pop(key, None)
         channel["messages"] = [message.dict() for message in self.messages]
         return channel
diff --git a/src/logs/message_log.py b/src/logs/message_log.py
index f534155..263c245 100644
--- a/src/logs/message_log.py
+++ b/src/logs/message_log.py
@@ -8,6 +8,9 @@ IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
 EMBED_IMAGES = ["image", "gifv"]
 
 
+NOT_SERIALIZED = ["channel"]
+
+
 class MessageLog:
     def __init__(self, message: Union[discord.Message, dict], channel: Any):
         self.channel = channel
@@ -79,7 +82,8 @@ class MessageLog:
 
     def dict(self) -> dict:
         message = dict(self.__dict__)
-        message.pop("channel", None)
+        for key in NOT_SERIALIZED:
+            message.pop(key, None)
         message["created_at"] = self.created_at.isoformat()
         message["edited_at"] = (
             self.edited_at.isoformat() if self.edited_at is not None else None

From 0e4ed0eb6b30c0bdd3f350f7a023d1f6fb93a676 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 19:07:43 +0200
Subject: [PATCH 17/20] only fetch history of given time

---
 src/logs/channel_logs.py | 84 ++++++++++++++++++++++++++++------------
 src/logs/guild_logs.py   | 26 ++++++++++---
 src/scanners/scanner.py  | 52 ++++++++++++++-----------
 src/utils/utils.py       |  4 +-
 4 files changed, 111 insertions(+), 55 deletions(-)

diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py
index 39d854b..9e83b2b 100644
--- a/src/logs/channel_logs.py
+++ b/src/logs/channel_logs.py
@@ -1,6 +1,7 @@
 from typing import Union, Tuple, Any
 import discord
 from discord import message
+from datetime import datetime
 
 from . import MessageLog
 from utils import FakeMessage
@@ -18,6 +19,7 @@ class ChannelLogs:
             self.id = channel.id
             self.name = channel.name
             self.last_message_id = None
+            self.first_message_id = None
             self.format = FORMAT
             self.messages = []
             self.start_date = None
@@ -32,6 +34,12 @@ class ChannelLogs:
                 if channel["last_message_id"] is not None
                 else None
             )
+            self.first_message_id = (
+                int(channel["first_message_id"])
+                if "first_message_id" in channel
+                and channel["first_message_id"] is not None
+                else None
+            )
             self.messages = [
                 MessageLog(message, self) for message in channel["messages"]
             ]
@@ -42,48 +50,74 @@ class ChannelLogs:
     def is_format(self):
         return self.format == FORMAT
 
-    async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
+    async def load(
+        self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
+    ) -> Tuple[int, int]:
         self.name = channel.name
         self.channel = channel
+        is_empty = self.last_message_id is None
         try:
-            if self.last_message_id is not None:  # append
+            if is_empty:
+                sanity_check = len(await channel.history(limit=1).flatten())
+                if sanity_check != 1:
+                    yield len(self.messages), True
+                    return
+            # load backward
+            if is_empty or (
+                start_date is not None
+                and self.start_date > start_date
+                and self.first_message_id is not None
+            ):
+                first_message_id = self.first_message_id
+                first_message_date = None
+                tmp_message_id = 0
+                done = 0
+                while (
+                    done >= CHUNK_SIZE
+                    or first_message_id is None
+                    or (first_message_date is None or first_message_date >= start_date)
+                    and start_date is not None
+                ) and tmp_message_id != first_message_id:
+                    tmp_message_id = first_message_id
+                    done = 0
+                    async for message in channel.history(
+                        limit=CHUNK_SIZE,
+                        before=FakeMessage(first_message_id)
+                        if first_message_id is not None
+                        else None,
+                        oldest_first=False,
+                    ):
+                        done += 1
+                        first_message_id = message.id
+                        first_message_date = message.created_at
+                        m = MessageLog(message, self)
+                        await m.load(message)
+                        self.messages += [m]
+                    yield len(self.messages), False
+                if done >= CHUNK_SIZE and first_message_date < start_date:
+                    # date was limiting here, store first message id
+                    self.first_message_id = first_message_id
+                self.last_message_id = channel.last_message_id
+            # load forward
+            if not is_empty:
                 tmp_message_id = None
+                last_message_date = self.messages[0].created_at
                 while (
                     self.last_message_id != channel.last_message_id
-                    and self.last_message_id != tmp_message_id
-                ):
+                    or (stop_date is not None and last_message_date <= stop_date)
+                ) and self.last_message_id != tmp_message_id:
                     tmp_message_id = self.last_message_id
                     async for message in channel.history(
                         limit=CHUNK_SIZE,
                         after=FakeMessage(self.last_message_id),
                         oldest_first=True,
                     ):
+                        last_message_date = message.created_at
                         self.last_message_id = message.id
                         m = MessageLog(message, self)
                         await m.load(message)
                         self.messages.insert(0, m)
                     yield len(self.messages), False
-            else:  # first load
-                last_message_id = None
-                done = 0
-                sanity_check = len(await channel.history(limit=1).flatten())
-                if sanity_check == 1:
-                    while done >= CHUNK_SIZE or last_message_id is None:
-                        done = 0
-                        async for message in channel.history(
-                            limit=CHUNK_SIZE,
-                            before=FakeMessage(last_message_id)
-                            if last_message_id is not None
-                            else None,
-                            oldest_first=False,
-                        ):
-                            done += 1
-                            last_message_id = message.id
-                            m = MessageLog(message, self)
-                            await m.load(message)
-                            self.messages += [m]
-                        yield len(self.messages), False
-                    self.last_message_id = channel.last_message_id
         except discord.errors.HTTPException:
             yield -1, True
             return  # When an exception occurs (like Forbidden)
diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py
index 9aa0d1a..6ba20cf 100644
--- a/src/logs/guild_logs.py
+++ b/src/logs/guild_logs.py
@@ -32,7 +32,13 @@ MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
 
 
 class Worker:
-    def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
+    def __init__(
+        self,
+        channel_log: ChannelLogs,
+        channel: discord.TextChannel,
+        start_date: datetime,
+        stop_date: datetime,
+    ):
         self.channel_log = channel_log
         self.channel = channel
         self.start_msg = len(channel_log.messages)
@@ -41,12 +47,16 @@ class Worker:
         self.done = False
         self.cancelled = False
         self.loop = asyncio.get_event_loop()
+        self.start_date = start_date
+        self.stop_date = stop_date
 
     def start(self):
         asyncio.run_coroutine_threadsafe(self.process(), self.loop)
 
     async def process(self):
-        async for count, done in self.channel_log.load(self.channel):
+        async for count, done in self.channel_log.load(
+            self.channel, self.start_date, self.stop_date
+        ):
             if count > 0:
                 self.queried_msg = count - self.start_msg
                 self.total_msg = count
@@ -98,7 +108,9 @@ class GuildLogs:
     async def load(
         self,
         progress: discord.Message,
-        target_channels: List[discord.TextChannel] = [],
+        target_channels: List[discord.TextChannel],
+        start_date: datetime,
+        stop_date: datetime,
         *,
         fast: bool,
         fresh: bool,
@@ -173,6 +185,8 @@ class GuildLogs:
         if (
             not fast
             and not fresh
+            and start_date is None
+            and stop_date is None
             and last_time is not None
             and (time.time() - last_time) < MIN_MODIFICATION_TIME
         ):
@@ -214,7 +228,9 @@ class GuildLogs:
                 if channel.id not in self.channels or fresh:
                     loading_new += 1
                     self.channels[channel.id] = ChannelLogs(channel, self)
-                workers += [Worker(self.channels[channel.id], channel)]
+                workers += [
+                    Worker(self.channels[channel.id], channel, start_date, stop_date)
+                ]
             warning_msg = "(this might take a while)"
             if len(target_channels) > 5 and loading_new > 5:
                 warning_msg = "(most channels are new, this will take a long while)"
@@ -255,7 +271,7 @@ class GuildLogs:
                     f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
                 )
             logging.info(
-                f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
+                f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
             )
             # write logs
             real_total_msg = sum(
diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py
index f30cb11..62ba168 100644
--- a/src/scanners/scanner.py
+++ b/src/scanners/scanner.py
@@ -90,10 +90,10 @@ class Scanner(ABC):
                     )
                     return
 
-            self.start_datetime = None if len(dates) < 1 else min(dates)
-            self.stop_datetime = datetime.now() if len(dates) < 2 else max(dates)
+            self.start_date = None if len(dates) < 1 else min(dates)
+            self.stop_date = None if len(dates) < 2 else max(dates)
 
-            if self.start_datetime is not None and self.start_datetime > datetime.now():
+            if self.start_date is not None and self.start_date > datetime.now():
                 await message.channel.send(
                     f"Start date is after today", reference=message
                 )
@@ -130,20 +130,13 @@ class Scanner(ABC):
                     allowed_mentions=discord.AllowedMentions.none(),
                 )
                 total_msg, total_chan = await logs.load(
-                    progress, self.channels, fast="fast" in args, fresh="fresh" in args
+                    progress,
+                    self.channels,
+                    self.start_date,
+                    self.stop_date,
+                    fast="fast" in args,
+                    fresh="fresh" in args,
                 )
-                if self.start_datetime is not None:
-                    self.start_datetime = max(
-                        self.start_datetime,
-                        min(
-                            [
-                                logs.channels[channel.id].start_date
-                                for channel in self.channels
-                                if channel.id in logs.channels
-                                and logs.channels[channel.id].start_date is not None
-                            ]
-                        ),
-                    )
                 if total_msg == CANCELLED:
                     await message.channel.send(
                         "Operation cancelled by user",
@@ -157,6 +150,21 @@ class Scanner(ABC):
                 elif total_msg == NO_FILE:
                     await message.channel.send(gdpr.TEXT)
                 else:
+                    if self.start_date is not None:
+                        self.start_date = max(
+                            self.start_date,
+                            min(
+                                [
+                                    logs.channels[channel.id].start_date
+                                    for channel in self.channels
+                                    if channel.id in logs.channels
+                                    and logs.channels[channel.id].start_date is not None
+                                ]
+                            ),
+                        )
+                        if self.stop_date is None:
+                            self.stop_date = datetime.utcnow()
+
                     self.msg_count = 0
                     self.total_msg = 0
                     self.chan_count = 0
@@ -169,12 +177,12 @@ class Scanner(ABC):
                                     self.compute_message(channel_logs, message_log)
                                     for message_log in channel_logs.messages
                                     if (
-                                        self.start_datetime is None
-                                        or message_log.created_at >= self.start_datetime
+                                        self.start_date is None
+                                        or message_log.created_at >= self.start_date
                                     )
                                     and (
-                                        self.stop_datetime is None
-                                        or message_log.created_at <= self.stop_datetime
+                                        self.stop_date is None
+                                        or message_log.created_at <= self.stop_date
                                     )
                                 ]
                             )
@@ -199,8 +207,8 @@ class Scanner(ABC):
                                 self.members,
                                 self.msg_count,
                                 self.chan_count,
-                                self.start_datetime,
-                                self.stop_datetime,
+                                self.start_date,
+                                self.stop_date,
                             )
                         )
                         logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
diff --git a/src/utils/utils.py b/src/utils/utils.py
index 729d633..a439ffd 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -180,9 +180,7 @@ def parse_iso_datetime(str_date: str) -> datetime:
     return dateutil.parser.parse(str_date)
 
 
-RELATIVE_REGEX = (
-    r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?))"
-)
+RELATIVE_REGEX = r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?)?)"
 
 
 def parse_relative_time(src: str) -> datetime:

From 715a5985130ef774e74ff8464bd4211ac9107416 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 19:11:30 +0200
Subject: [PATCH 18/20] fix cancelled bug

---
 src/logs/guild_logs.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py
index 6ba20cf..3e5f748 100644
--- a/src/logs/guild_logs.py
+++ b/src/logs/guild_logs.py
@@ -89,21 +89,22 @@ class GuildLogs:
         return self.locked and self.log_file not in current_analysis
 
     def lock(self) -> bool:
-        self.locked = True
         current_analysis_lock.acquire()
         if self.log_file in current_analysis:
             current_analysis_lock.release()
             return False
+        self.locked = True
         current_analysis.append(self.log_file)
         current_analysis_lock.release()
         return True
 
     def unlock(self):
-        self.locked = False
-        current_analysis_lock.acquire()
-        if self.log_file in current_analysis:
-            current_analysis.remove(self.log_file)
-        current_analysis_lock.release()
+        if self.locked:
+            self.locked = False
+            current_analysis_lock.acquire()
+            if self.log_file in current_analysis:
+                current_analysis.remove(self.log_file)
+            current_analysis_lock.release()
 
     async def load(
         self,

From cf6fa7ccf2aed65908054f0868dca5725902bac3 Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 19:49:34 +0200
Subject: [PATCH 19/20] smol fix

---
 src/data_types/frequency.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/data_types/frequency.py b/src/data_types/frequency.py
index b0f00ec..aab30cf 100644
--- a/src/data_types/frequency.py
+++ b/src/data_types/frequency.py
@@ -49,7 +49,7 @@ class Frequency:
         if (
             self.dates[0].weekday() <= busiest_weekday
             and self.dates[-1].weekday() >= busiest_weekday
-        ):
+        ) or n_weekdays == 0:
             n_weekdays += 1
         n_hours = delta.days
         if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:

From 76af4661ed8ea80b0d0662b3bb34da5f7bfb780e Mon Sep 17 00:00:00 2001
From: Klemek <klemek@outlook.fr>
Date: Fri, 9 Apr 2021 19:50:12 +0200
Subject: [PATCH 20/20] fixed time range loading

---
 src/logs/channel_logs.py | 39 ++++++++++++++++++++-------------------
 src/logs/guild_logs.py   |  2 ++
 src/scanners/scanner.py  |  2 +-
 3 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/logs/channel_logs.py b/src/logs/channel_logs.py
index 9e83b2b..a5e3857 100644
--- a/src/logs/channel_logs.py
+++ b/src/logs/channel_logs.py
@@ -64,47 +64,48 @@ class ChannelLogs:
                     return
             # load backward
             if is_empty or (
-                start_date is not None
-                and self.start_date > start_date
-                and self.first_message_id is not None
+                self.first_message_id is not None
+                and (
+                    start_date is None
+                    or (self.start_date is not None and self.start_date > start_date)
+                )
             ):
-                first_message_id = self.first_message_id
                 first_message_date = None
                 tmp_message_id = 0
                 done = 0
                 while (
-                    done >= CHUNK_SIZE
-                    or first_message_id is None
-                    or (first_message_date is None or first_message_date >= start_date)
-                    and start_date is not None
-                ) and tmp_message_id != first_message_id:
-                    tmp_message_id = first_message_id
+                    first_message_date is None
+                    or (
+                        done >= CHUNK_SIZE
+                        and (start_date is None or first_message_date > start_date)
+                    )
+                ) and tmp_message_id != self.first_message_id:
+                    tmp_message_id = self.first_message_id
                     done = 0
                     async for message in channel.history(
                         limit=CHUNK_SIZE,
-                        before=FakeMessage(first_message_id)
-                        if first_message_id is not None
+                        before=FakeMessage(self.first_message_id)
+                        if self.first_message_id is not None
                         else None,
                         oldest_first=False,
                     ):
                         done += 1
-                        first_message_id = message.id
+                        self.first_message_id = message.id
                         first_message_date = message.created_at
                         m = MessageLog(message, self)
                         await m.load(message)
                         self.messages += [m]
                     yield len(self.messages), False
-                if done >= CHUNK_SIZE and first_message_date < start_date:
-                    # date was limiting here, store first message id
-                    self.first_message_id = first_message_id
+                if done < CHUNK_SIZE:  # reached bottom
+                    self.first_message_id = None
                 self.last_message_id = channel.last_message_id
             # load forward
-            if not is_empty:
+            last_message_date = self.messages[0].created_at
+            if not is_empty and (stop_date is None or last_message_date < stop_date):
                 tmp_message_id = None
-                last_message_date = self.messages[0].created_at
                 while (
                     self.last_message_id != channel.last_message_id
-                    or (stop_date is not None and last_message_date <= stop_date)
+                    and (stop_date is None or last_message_date < stop_date)
                 ) and self.last_message_id != tmp_message_id:
                     tmp_message_id = self.last_message_id
                     async for message in channel.history(
diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py
index 3e5f748..7600077 100644
--- a/src/logs/guild_logs.py
+++ b/src/logs/guild_logs.py
@@ -195,8 +195,10 @@ class GuildLogs:
                 channel
                 for channel in target_channels
                 if channel.id not in self.channels
+                or self.channels[channel.id].first_message_id is not None
             ]
             if len(invalid_target_channels) == 0:
+                logging.info(f"log {self.guild.id} > assumed fast")
                 fast = True
                 if self.locked:
                     self.unlock()
diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py
index 62ba168..b4a96f1 100644
--- a/src/scanners/scanner.py
+++ b/src/scanners/scanner.py
@@ -150,7 +150,7 @@ class Scanner(ABC):
                 elif total_msg == NO_FILE:
                     await message.channel.send(gdpr.TEXT)
                 else:
-                    if self.start_date is not None:
+                    if self.start_date is not None and len(logs.channels) > 0:
                         self.start_date = max(
                             self.start_date,
                             min(