From 6a70663201a95471365e287bd2638de7d4df8877 Mon Sep 17 00:00:00 2001 From: Klemek Date: Fri, 9 Apr 2021 14:57:55 +0200 Subject: [PATCH] gdpr agreements --- src/logs/__init__.py | 2 +- src/logs/guild_logs.py | 117 +++++++++++++++++++++++----------------- src/main.py | 10 +++- src/scanners/scanner.py | 13 ++++- src/utils/gdpr.py | 68 +++++++++++++++++++++++ 5 files changed, 157 insertions(+), 53 deletions(-) create mode 100644 src/utils/gdpr.py diff --git a/src/logs/__init__.py b/src/logs/__init__.py index 358e9af..d62ab1d 100644 --- a/src/logs/__init__.py +++ b/src/logs/__init__.py @@ -1,3 +1,3 @@ from .message_log import MessageLog from .channel_logs import ChannelLogs -from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED +from .guild_logs import GuildLogs, ALREADY_RUNNING, CANCELLED, NO_FILE diff --git a/src/logs/guild_logs.py b/src/logs/guild_logs.py index 3c4af39..9aa0d1a 100644 --- a/src/logs/guild_logs.py +++ b/src/logs/guild_logs.py @@ -23,11 +23,12 @@ current_analysis_lock = threading.Lock() ALREADY_RUNNING = -100 CANCELLED = -200 +NO_FILE = -300 # 5 minutes, assume 'fast' arg MIN_MODIFICATION_TIME = 5 * 60 -# ~6 months, remove log file -MAX_MODIFICATION_TIME = 6 * 30.5 * 24 * 60 * 60 +# ~1 year, remove log file +MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60 class Worker: @@ -110,52 +111,49 @@ class GuildLogs: if not os.path.exists(LOG_DIR): os.mkdir(LOG_DIR) last_time = None - if os.path.exists(self.log_file): - channels = {} - try: - last_time = os.path.getmtime(self.log_file) - gziped_data = None - await code_message(progress, "Reading saved history (1/4)...") - t0 = datetime.now() - with open(self.log_file, mode="rb") as f: - gziped_data = f.read() - logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms") - if self.check_cancelled(): - return CANCELLED, 0 - await code_message(progress, "Reading saved history (2/4)...") - t0 = datetime.now() - json_data = gzip.decompress(gziped_data) - del gziped_data - logging.info( - f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms" - ) - if self.check_cancelled(): - return CANCELLED, 0 - await code_message(progress, "Reading saved history (3/4)...") - t0 = datetime.now() - channels = json.loads(json_data) - del json_data - logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms") - if self.check_cancelled(): - return CANCELLED, 0 - await code_message(progress, "Reading saved history (4/4)...") - t0 = datetime.now() - self.channels = { - int(id): ChannelLogs(channels[id], self) for id in channels - } - # remove invalid format - self.channels = { - id: self.channels[id] - for id in self.channels - if self.channels[id].is_format() - } - logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms") - except json.decoder.JSONDecodeError: - logging.error(f"log {self.guild.id} > invalid JSON") - except IOError: - logging.error(f"log {self.guild.id} > cannot read") - else: - fast = False + if not os.path.exists(self.log_file): + return NO_FILE, 0 + channels = {} + try: + last_time = os.path.getmtime(self.log_file) + gziped_data = None + await code_message(progress, "Reading saved history (1/4)...") + t0 = datetime.now() + with open(self.log_file, mode="rb") as f: + gziped_data = f.read() + logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms") + if self.check_cancelled(): + return CANCELLED, 0 + await code_message(progress, "Reading saved history (2/4)...") + t0 = datetime.now() + json_data = gzip.decompress(gziped_data) + del gziped_data + logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms") + if self.check_cancelled(): + return CANCELLED, 0 + await code_message(progress, "Reading saved history (3/4)...") + t0 = datetime.now() + channels = json.loads(json_data) + del json_data + logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms") + if self.check_cancelled(): + return CANCELLED, 0 + await code_message(progress, "Reading saved history (4/4)...") + t0 = datetime.now() + self.channels = { + int(id): ChannelLogs(channels[id], self) for id in channels + } + # remove invalid format + self.channels = { + id: self.channels[id] + for id in self.channels + if self.channels[id].is_format() + } + logging.info(f"log {self.guild.id} > loaded in {delta(t0):,}ms") + except json.decoder.JSONDecodeError: + logging.error(f"log {self.guild.id} > invalid JSON") + except IOError: + logging.error(f"log {self.guild.id} > cannot read") if len(target_channels) == 0: target_channels = ( @@ -327,6 +325,29 @@ class GuildLogs: reference=message, ) + @staticmethod + def init_log(guild: List[discord.Guild]): + if not os.path.exists(LOG_DIR): + os.mkdir(LOG_DIR) + filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}") + if not os.path.exists(filename): + with open(filename, mode="wb") as f: + f.write(gzip.compress(bytes("{}", "utf-8"))) + logging.info(f"log {guild.id} > created") + else: + logging.info(f"log {guild.id} > already exists") + + @staticmethod + def remove_log(guild: List[discord.Guild]): + if not os.path.exists(LOG_DIR): + os.mkdir(LOG_DIR) + filename = os.path.join(LOG_DIR, f"{guild.id}{LOG_EXT}") + if os.path.exists(filename): + os.unlink(filename) + logging.info(f"log {guild.id} > removed") + else: + logging.info(f"log {guild.id} > does not exists") + @staticmethod def check_logs(guilds: List[discord.Guild]): logging.info(f"checking logs...") diff --git a/src/main.py b/src/main.py index 69f6b0a..9bd111c 100644 --- a/src/main.py +++ b/src/main.py @@ -6,7 +6,7 @@ if sys.version_info < (3, 7): print("Please upgrade your Python version to 3.7.0 or higher") sys.exit(1) -from utils import emojis +from utils import emojis, gdpr from scanners import ( EmotesScanner, FullScanner, @@ -57,7 +57,13 @@ bot.register_command( "(cancel|stop)", GuildLogs.cancel, "cancel: stop current analysis (not launched with fast)", - "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```", + "```\n%cancel: Stop current analysis (not launched with fast)\n```", +) +bot.register_command( + "gdpr", + gdpr.process, + "gdpr: displays GDPR information", + gdpr.HELP, ) bot.register_command( "words", diff --git a/src/scanners/scanner.py b/src/scanners/scanner.py index 9a7e712..772f63f 100644 --- a/src/scanners/scanner.py +++ b/src/scanners/scanner.py @@ -5,8 +5,15 @@ import logging import re import discord -from utils import no_duplicate, get_intro, delta -from logs import GuildLogs, ChannelLogs, MessageLog, ALREADY_RUNNING, CANCELLED +from utils import no_duplicate, get_intro, delta, gdpr +from logs import ( + GuildLogs, + ChannelLogs, + MessageLog, + ALREADY_RUNNING, + CANCELLED, + NO_FILE, +) class Scanner(ABC): @@ -106,6 +113,8 @@ class Scanner(ABC): "An analysis is already running on this server, please be patient.", reference=message, ) + elif total_msg == NO_FILE: + await message.channel.send(gdpr.TEXT) else: self.msg_count = 0 self.total_msg = 0 diff --git a/src/utils/gdpr.py b/src/utils/gdpr.py new file mode 100644 index 0000000..e19aa98 --- /dev/null +++ b/src/utils/gdpr.py @@ -0,0 +1,68 @@ +import discord + +from logs import GuildLogs + + +HELP = ( + "```\n" + + "%gdpr: Displays GDPR information\n" + + "arguments:\n" + + "* agree - agree to GDPR\n" + + "* revoke - remove this server's data\n" + + "```" +) + +TEXT = ( + "" + + "__**About Analyst-bot's data usage**__\n" + + "**TL;DR**\n" + + "Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 12 months or until the bot is leaving the guild/server.\n" + + "**Data collection**\n" + + "Analyst-bot collects a Discord guild/server's history when asked to.\n" + + "This includes:\n" + + "- Visible text channel names\n" + + "- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)\n" + + "This does __not__ includes:\n" + + "- Voice channels and not visible channels\n" + + "- Not visible text messages\n" + + "- Visible text messages' embedded content, images and other attachments\n" + + "**Data processing**\n" + + "Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.\n" + + "**Data storage and retain policy**\n" + + "Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.\n" + + "Any collected data are retained maximum 12 months until deletion or when the bot is leaving a guild/server.\n" + + "**Data sharing**\n" + + "Analyst-bot does not share the data collected with any third-party.\n" + + "**Right to retract**\n" + + "If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.\n" + + "**Terms agreement**\n" + + "By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.\n" + + "\n" + + "*If you want more information, please contact the creator of this bot: .*\n" + + "\n" + + "Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again." +) + +AGREE_TEXT = "Thanks for agreeing for these terms, you can now run analysis on this guild/server." + +REVOKE_TEXT = "This guild/server's data has been deleted. To run new analysis you must agree to the terms again." + + +async def process(client: discord.client, message: discord.Message, *args: str): + args = list(args) + if len(args) == 1: + await message.channel.send(TEXT) + elif len(args) > 2: + await message.channel.send(f"Too many arguments", reference=message) + elif args[1] == "help": + await message.channel.send(HELP, reference=message) + elif args[1] in ["agree", "accept"]: + GuildLogs.init_log(message.channel.guild) + await message.channel.send(AGREE_TEXT, reference=message) + elif args[1] in ["revoke", "cancel"]: + GuildLogs.remove_log(message.channel.guild) + await message.channel.send(REVOKE_TEXT, reference=message) + else: + await message.channel.send( + f"Unrecognized argument: `{args[1]}`", reference=message + )