26 Commits

Author SHA1 Message Date
Klemek 78ad50be22 fix counter 2023-10-16 11:16:22 +02:00
Klemek 87217ac31c fix fix 2023-10-16 11:13:42 +02:00
Klemek c503673cbc fix utc_today 2023-10-16 11:10:02 +02:00
Klemek fffaae130c fix dates 2023-10-16 11:03:46 +02:00
Klemek eb6607de60 bump version 2023-10-16 10:18:01 +02:00
Klemek 18fe35e10f 30 to 90 days 2023-10-16 10:17:49 +02:00
Klemek cb37b052c9 fix sanity check 2022-09-02 10:22:12 +02:00
Klemek d8e9e48a13 update requirements.txt 2022-09-02 09:52:33 +02:00
Klemek 6dd45af14f discord v2 2022-09-02 09:41:52 +02:00
Klemek 3cca5c38f5 fix requirements.txt 2022-08-23 09:33:00 +02:00
Klemek 667fb0d414 Merge branch 'master' of github.com:klemek/discord-analyst 2022-08-22 09:22:26 +02:00
Klemek 5089d7d10b force discord.py version 2022-08-22 09:22:23 +02:00
Klemek afb3d7d663 remove invalid stats 2022-05-29 00:09:33 +02:00
Klemek cf2fa3208e Update gdpr.py 2022-03-07 23:08:34 +01:00
Klemek 7b4d952f9e Update GDPR.md 2022-03-07 23:08:11 +01:00
Klemek 2850c7e630 Update gdpr.py 2022-03-07 22:29:37 +01:00
Klemek c252f4cc67 Update GDPR.md 2022-03-07 22:29:09 +01:00
Klemek 96a335bea6 update requirements.txt 2022-03-07 14:12:56 +01:00
Klemek fb48a256ce v1.17 2022-03-07 13:59:42 +01:00
Klemek 0ceffca196 merge with master 2022-03-07 13:28:29 +01:00
Klemek 51911604a9 Update main.py 2022-03-07 13:07:02 +01:00
Klemek 660341127d Update guild_logs.py 2022-03-07 13:06:28 +01:00
Klemek d419a7f2d2 Rename TOS.md to GDPR.md 2022-03-07 12:53:49 +01:00
Klemek b4b818a5c3 Create TOS.md 2022-03-07 12:53:29 +01:00
Klemek b39bc5c16b Update presence_scanner.py 2021-11-05 13:05:17 +01:00
Klemek 130cc5370c Merge pull request #56 from Klemek/dev
v1.16.1
2021-07-13 18:47:12 +02:00
16 changed files with 169 additions and 54 deletions
+4
View File
@@ -0,0 +1,4 @@
DISCORD_TOKEN=
PYTHONPATH=./src
CRYPT_KEY=
LOG_DIR=logs
+46
View File
@@ -0,0 +1,46 @@
# About Analyst-bot's data usage
## TL;DR
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
## Data collection
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
- Visible text channel names
- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)
This does __not__ includes:
- Voice channels and not visible channels
- Not visible text messages
- Visible text messages' embedded content, images and other attachments
## Data processing
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
## Data storage and retain policy
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
## Data sharing
Analyst-bot does not share the data collected with any third-party.
## Right to retract
If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
## Terms agreement
By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
+4 -1
View File
@@ -125,11 +125,14 @@ python3 src/main.py
## Changelog ## Changelog
* **v1.17**
* compliency with 30 days data keeping policy and data encryption
* improvements and bug fix
* **v1.16** * **v1.16**
* `%freq graph` graph hours frequency along the week * `%freq graph` graph hours frequency along the week
* uses discord new time format * uses discord new time format
* `%freq` now shows quietest day of week and hour of day * `%freq` now shows quietest day of week and hour of day
* improvments and bug fix * improvements and bug fix
* **v1.15** * **v1.15**
* `nsfw:allow/only` filter nsfw channels * `nsfw:allow/only` filter nsfw channels
* `%find` can use regexes * `%find` can use regexes
+6 -6
View File
@@ -1,6 +1,6 @@
discord.py==1.7.0 discord.py>=2.0.0
python-dotenv==0.15.0 python-dotenv>=0.15.0
python-dateutil==2.8.1 python-dateutil>=2.8.1
git+git://github.com/Klemek/miniscord.git matplotlib>=3.4.2
numpy cryptography>=2.8
matplotlib git+https://github.com/Klemek/miniscord.git
+2 -2
View File
@@ -4,7 +4,7 @@ from collections import defaultdict
# Custom libs # Custom libs
from utils import plural, from_now, percent, val_sum, top_key from utils import plural, from_now, percent, val_sum, top_key, utc_today
class Counter: class Counter:
@@ -25,7 +25,7 @@ class Counter:
if self.last_used is None: if self.last_used is None:
return 0 return 0
return self.all_usages() + 1 / ( return self.all_usages() + 1 / (
100000 * ((datetime.today() - self.last_used).days + 1) 100000 * (abs((utc_today() - self.last_used).days) + 1)
) )
def all_usages(self) -> int: def all_usages(self) -> int:
+3 -3
View File
@@ -5,7 +5,7 @@ import discord
# Custom libs # Custom libs
from utils import mention, plural, from_now, top_key, percent from utils import mention, plural, from_now, top_key, percent, utc_today
class Emoji: class Emoji:
@@ -44,14 +44,14 @@ class Emoji:
) )
def life_days(self) -> int: def life_days(self) -> int:
return (datetime.today() - self.emoji.created_at).days return (utc_today() - self.emoji.created_at).days
def use_days(self) -> int: def use_days(self) -> int:
# If never used, use creation date instead # If never used, use creation date instead
if self.last_used is None: if self.last_used is None:
return self.life_days() return self.life_days()
else: else:
return (datetime.today() - self.last_used).days return (utc_today() - self.last_used).days
def get_top_member(self) -> int: def get_top_member(self) -> int:
return top_key(self.members) return top_key(self.members)
-1
View File
@@ -2,7 +2,6 @@ from typing import List
from datetime import timedelta from datetime import timedelta
import calendar import calendar
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO from io import BytesIO
import discord import discord
import time import time
+4 -4
View File
@@ -54,16 +54,16 @@ class Presence:
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})" f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
if show_top_channel if show_top_channel
else "", else "",
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})" f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg,)"
if show_top_channel and member_specific if show_top_channel and member_specific
else "", else "",
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})" f"- **was mentioned**: {plural(mention_sum, 'time')}"
if member_specific and len(self.mentions) > 0 if member_specific and len(self.mentions) > 0
else "", else "",
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})" f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
if member_specific and len(self.mentions) > 0 if member_specific and len(self.mentions) > 0
else "", else "",
f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})" f"- **mentioned others**: {plural(mention_others_sum, 'time')}"
if len(self.mention_others) > 0 and member_specific if len(self.mention_others) > 0 and member_specific
else "", else "",
f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})" f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
@@ -81,7 +81,7 @@ class Presence:
f"- **reactions**: {plural(total_reaction_used, 'time')}" f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and not member_specific if len(self.reactions) > 0 and not member_specific
else "", else "",
f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})" f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and member_specific if len(self.reactions) > 0 and member_specific
else "", else "",
f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})" f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
+2 -2
View File
@@ -66,8 +66,8 @@ class ChannelLogs:
is_empty = self.last_message_id is None is_empty = self.last_message_id is None
try: try:
if is_empty: if is_empty:
sanity_check = len(await channel.history(limit=1).flatten()) sanity_check = len([message async for message in channel.history(limit=1)])
if sanity_check != 1: if sanity_check < 1:
yield len(self.messages), True yield len(self.messages), True
return return
# load backward # load backward
+50 -20
View File
@@ -8,28 +8,30 @@ import time
import logging import logging
import asyncio import asyncio
import threading import threading
from dotenv import load_dotenv
from cryptography.fernet import Fernet
from . import ChannelLogs from . import ChannelLogs
from utils import code_message, delta, deltas from utils import code_message, delta, deltas
LOG_DIR = "logs"
LOG_EXT = ".logz"
current_analysis = [] current_analysis = []
current_analysis_lock = threading.Lock() current_analysis_lock = threading.Lock()
ALREADY_RUNNING = -100 ALREADY_RUNNING = -100
CANCELLED = -200 CANCELLED = -200
NO_FILE = -300 NO_FILE = -300
# 5 minutes, assume 'fast' arg load_dotenv()
MIN_MODIFICATION_TIME = 5 * 60
# ~1 year, remove log file
MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
LOG_DIR = os.getenv("LOG_DIR", "logs")
LOG_EXT = os.getenv("LOG_EXT", ".logz")
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 5 * 60))
# 90 days, remove log file
MAX_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 90 * 24 * 60 * 60))
class Worker: class Worker:
def __init__( def __init__(
@@ -129,29 +131,41 @@ class GuildLogs:
channels = {} channels = {}
try: try:
last_time = os.path.getmtime(self.log_file) last_time = os.path.getmtime(self.log_file)
gziped_data = None encrypted_data = None
await code_message(progress, "Reading saved history (1/4)...") await code_message(progress, "Reading saved history (1/5)...")
t0 = datetime.now() t0 = datetime.now()
with open(self.log_file, mode="rb") as f: with open(self.log_file, mode="rb") as f:
gziped_data = f.read() encrypted_data = f.read()
logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms") logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
if self.check_cancelled(): if self.check_cancelled():
return CANCELLED, 0 return CANCELLED, 0
await code_message(progress, "Reading saved history (2/4)...") await code_message(progress, "Reading saved history (2/5)...")
if CRYPT_KEY == "" or CRYPT_KEY is None:
gziped_data = encrypted_data
try:
t0 = datetime.now()
fernet = Fernet(CRYPT_KEY)
gziped_data = fernet.decrypt(encrypted_data)
logging.info(f"log {self.guild.id} > decrypted in {delta(t0):,}ms")
except:
gziped_data = encrypted_data
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/5)...")
t0 = datetime.now() t0 = datetime.now()
json_data = gzip.decompress(gziped_data) json_data = gzip.decompress(gziped_data)
del gziped_data del gziped_data
logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms") logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
if self.check_cancelled(): if self.check_cancelled():
return CANCELLED, 0 return CANCELLED, 0
await code_message(progress, "Reading saved history (3/4)...") await code_message(progress, "Reading saved history (4/5)...")
t0 = datetime.now() t0 = datetime.now()
channels = json.loads(json_data) channels = json.loads(json_data)
del json_data del json_data
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms") logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
if self.check_cancelled(): if self.check_cancelled():
return CANCELLED, 0 return CANCELLED, 0
await code_message(progress, "Reading saved history (4/4)...") await code_message(progress, "Reading saved history (5/5)...")
t0 = datetime.now() t0 = datetime.now()
self.channels = { self.channels = {
int(id): ChannelLogs(channels[id], self) for id in channels int(id): ChannelLogs(channels[id], self) for id in channels
@@ -288,7 +302,7 @@ class GuildLogs:
return CANCELLED, 0 return CANCELLED, 0
await code_message( await code_message(
progress, progress,
f"Saving history (1/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels", f"Saving history (1/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
) )
t0 = datetime.now() t0 = datetime.now()
json_data = bytes(json.dumps(self.dict()), "utf-8") json_data = bytes(json.dumps(self.dict()), "utf-8")
@@ -299,7 +313,7 @@ class GuildLogs:
return CANCELLED, 0 return CANCELLED, 0
await code_message( await code_message(
progress, progress,
f"Saving history (2/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels", f"Saving history (2/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
) )
t0 = datetime.now() t0 = datetime.now()
gziped_data = gzip.compress(json_data) gziped_data = gzip.compress(json_data)
@@ -311,12 +325,28 @@ class GuildLogs:
return CANCELLED, 0 return CANCELLED, 0
await code_message( await code_message(
progress, progress,
f"Saving history (3/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels", f"Saving history (3/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
if CRYPT_KEY == "" or CRYPT_KEY is None:
encrypted_data = gziped_data
try:
t0 = datetime.now()
fernet = Fernet(CRYPT_KEY)
encrypted_data = fernet.encrypt(gziped_data)
logging.info(f"log {self.guild.id} > encrypted in {delta(t0):,}ms -> {len(gziped_data) / deltas(t0):,.3f} b/s")
except:
encrypted_data = gziped_data
if self.check_cancelled():
return CANCELLED, 0
await code_message(
progress,
f"Saving history (4/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
) )
t0 = datetime.now() t0 = datetime.now()
with open(self.log_file, mode="wb") as f: with open(self.log_file, mode="wb") as f:
f.write(gziped_data) f.write(encrypted_data)
del gziped_data del gziped_data
del encrypted_data
logging.info( logging.info(
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s" f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
) )
+1 -1
View File
@@ -18,7 +18,7 @@ emojis.load_emojis()
bot = Bot( bot = Bot(
"Discord Analyst", "Discord Analyst",
"1.16.1", "1.17.5",
alias="%", alias="%",
) )
+2 -2
View File
@@ -69,8 +69,8 @@ class PresenceScanner(Scanner):
for mention in message.mentions: for mention in message.mentions:
pres.mention_others[mention] += 1 pres.mention_others[mention] += 1
pres.messages[message.author] += 1 pres.messages[message.author] += 1
pres.channel_total[channel.id] += 1 pres.channel_total[channel.id] += 1
pres.mention_count[message.author] += len(message.mentions) pres.mention_count[message.author] += len(message.mentions)
if len(raw_members) > 0: if len(raw_members) > 0:
for mention in message.mentions: for mention in message.mentions:
if mention in raw_members: if mention in raw_members:
+3 -2
View File
@@ -18,6 +18,7 @@ from utils import (
command_cache, command_cache,
FilterLevel, FilterLevel,
SPLIT_TOKEN, SPLIT_TOKEN,
utc_now
) )
from logs import ( from logs import (
GuildLogs, GuildLogs,
@@ -131,7 +132,7 @@ class Scanner(ABC):
self.start_date = None if len(dates) < 1 else min(dates) self.start_date = None if len(dates) < 1 else min(dates)
self.stop_date = None if len(dates) < 2 else max(dates) self.stop_date = None if len(dates) < 2 else max(dates)
if self.start_date is not None and self.start_date > datetime.now(): if self.start_date is not None and self.start_date > utc_now():
await message.channel.send( await message.channel.send(
f"Start date is after today", reference=message f"Start date is after today", reference=message
) )
@@ -228,7 +229,7 @@ class Scanner(ABC):
), ),
) )
if self.stop_date is None: if self.stop_date is None:
self.stop_date = datetime.utcnow() self.stop_date = utc_now()
self.msg_count = 0 self.msg_count = 0
self.total_msg = 0 self.total_msg = 0
+26
View File
@@ -0,0 +1,26 @@
import os
import os.path
from dotenv import load_dotenv
from cryptography.fernet import Fernet
load_dotenv()
LOG_DIR = os.getenv("LOG_DIR", "logs")
LOG_EXT = os.getenv("LOG_DIR", ".logz")
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
fernet = Fernet(CRYPT_KEY)
for item in os.listdir(LOG_DIR):
if item.endswith(LOG_EXT):
path = os.path.join(LOG_DIR, item)
data = None
with open(path, mode="rb") as f:
data = f.read()
try:
fernet.decrypt(data)
print(f"{item} already encrypted")
except:
with open(path, mode="wb") as f:
f.write(fernet.encrypt(data))
print(f"{item} was encrypted")
+2 -2
View File
@@ -13,7 +13,7 @@ arguments:
TEXT = """ TEXT = """
__**About Analyst-bot's data usage**__ __**About Analyst-bot's data usage**__
**TL;DR** **TL;DR**
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server. Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
**Data collection** **Data collection**
Analyst-bot collects a Discord guild/server's history when asked to. Analyst-bot collects a Discord guild/server's history when asked to.
This includes: This includes:
@@ -27,7 +27,7 @@ This does __not__ includes:
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained. Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
**Data storage and retain policy** **Data storage and retain policy**
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only. Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server. Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
**Data sharing** **Data sharing**
Analyst-bot does not share the data collected with any third-party. Analyst-bot does not share the data collected with any third-party.
**Right to retract** **Right to retract**
+14 -8
View File
@@ -4,7 +4,7 @@ import os
import logging import logging
import discord import discord
import math import math
from datetime import datetime, timedelta from datetime import datetime, timedelta, timezone
import re import re
import time import time
import dateutil.parser import dateutil.parser
@@ -116,7 +116,7 @@ def escape_text(text: str) -> str:
return discord.utils.escape_markdown(discord.utils.escape_mentions(text)) return discord.utils.escape_markdown(discord.utils.escape_mentions(text))
class FakeMessage: class FakeMessage(discord.abc.Snowflake):
def __init__(self, id: int): def __init__(self, id: int):
self.id = id self.id = id
@@ -258,13 +258,19 @@ def parse_iso_datetime(str_date: str) -> datetime:
RELATIVE_REGEX = r"(yesterday|today|\d*hours?|\d+h(ours?)?|\d*days?|\d+d(ays?)?|\d*weeks?|\d+w(eeks?)?|\d*months?|\d+m(onths?)?|\d*years?|\d+y(ears?)?)" RELATIVE_REGEX = r"(yesterday|today|\d*hours?|\d+h(ours?)?|\d*days?|\d+d(ays?)?|\d*weeks?|\d+w(eeks?)?|\d*months?|\d+m(onths?)?|\d*years?|\d+y(ears?)?)"
def utc_now() -> datetime:
return datetime.now(tz=timezone.utc)
def utc_today() -> datetime:
today = utc_now().date()
return datetime(today.year, today.month, today.day, tzinfo=timezone.utc)
def parse_relative_time(src: str) -> datetime: def parse_relative_time(src: str) -> datetime:
today = datetime.utcnow().date()
today = datetime(today.year, today.month, today.day)
if src == "today": if src == "today":
return today return utc_today()
elif src == "yesterday": elif src == "yesterday":
return today - relativedelta(days=1) return utc_today() - relativedelta(days=1)
else: else:
m = re.match("(\d*)(\w+)", src) m = re.match("(\d*)(\w+)", src)
delta = None delta = None
@@ -280,7 +286,7 @@ def parse_relative_time(src: str) -> datetime:
delta = relativedelta(months=value) delta = relativedelta(months=value)
elif unit == "y": elif unit == "y":
delta = relativedelta(years=value) delta = relativedelta(years=value)
return datetime.utcnow() - delta return utc_now() - delta
def parse_time(src: str) -> datetime: def parse_time(src: str) -> datetime:
@@ -344,7 +350,7 @@ def get_intro(
""" """
time_text = "" time_text = ""
if start_datetime is not None: if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime stop_datetime = utc_now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})" time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units # Show all data (members, channels) when it's less than 5 units
if len(members) == 0: if len(members) == 0: