Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 78ad50be22 | |||
| 87217ac31c | |||
| c503673cbc | |||
| fffaae130c | |||
| eb6607de60 | |||
| 18fe35e10f | |||
| cb37b052c9 | |||
| d8e9e48a13 | |||
| 6dd45af14f | |||
| 3cca5c38f5 | |||
| 667fb0d414 | |||
| 5089d7d10b | |||
| afb3d7d663 | |||
| cf2fa3208e | |||
| 7b4d952f9e | |||
| 2850c7e630 | |||
| c252f4cc67 | |||
| 96a335bea6 | |||
| fb48a256ce | |||
| 0ceffca196 | |||
| 51911604a9 | |||
| 660341127d | |||
| d419a7f2d2 | |||
| b4b818a5c3 | |||
| b39bc5c16b | |||
| 130cc5370c |
@@ -0,0 +1,4 @@
|
||||
DISCORD_TOKEN=
|
||||
PYTHONPATH=./src
|
||||
CRYPT_KEY=
|
||||
LOG_DIR=logs
|
||||
@@ -0,0 +1,46 @@
|
||||
# About Analyst-bot's data usage
|
||||
|
||||
## TL;DR
|
||||
|
||||
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
|
||||
|
||||
## Data collection
|
||||
|
||||
Analyst-bot collects a Discord guild/server's history when asked to.
|
||||
|
||||
This includes:
|
||||
|
||||
- Visible text channel names
|
||||
- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)
|
||||
|
||||
This does __not__ includes:
|
||||
|
||||
- Voice channels and not visible channels
|
||||
- Not visible text messages
|
||||
- Visible text messages' embedded content, images and other attachments
|
||||
|
||||
## Data processing
|
||||
|
||||
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
|
||||
|
||||
## Data storage and retain policy
|
||||
|
||||
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
|
||||
|
||||
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
|
||||
|
||||
## Data sharing
|
||||
|
||||
Analyst-bot does not share the data collected with any third-party.
|
||||
|
||||
## Right to retract
|
||||
|
||||
If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
|
||||
|
||||
## Terms agreement
|
||||
|
||||
By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
|
||||
|
||||
*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
|
||||
|
||||
Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
|
||||
@@ -125,11 +125,14 @@ python3 src/main.py
|
||||
|
||||
## Changelog
|
||||
|
||||
* **v1.17**
|
||||
* compliency with 30 days data keeping policy and data encryption
|
||||
* improvements and bug fix
|
||||
* **v1.16**
|
||||
* `%freq graph` graph hours frequency along the week
|
||||
* uses discord new time format
|
||||
* `%freq` now shows quietest day of week and hour of day
|
||||
* improvments and bug fix
|
||||
* improvements and bug fix
|
||||
* **v1.15**
|
||||
* `nsfw:allow/only` filter nsfw channels
|
||||
* `%find` can use regexes
|
||||
|
||||
+6
-6
@@ -1,6 +1,6 @@
|
||||
discord.py==1.7.0
|
||||
python-dotenv==0.15.0
|
||||
python-dateutil==2.8.1
|
||||
git+git://github.com/Klemek/miniscord.git
|
||||
numpy
|
||||
matplotlib
|
||||
discord.py>=2.0.0
|
||||
python-dotenv>=0.15.0
|
||||
python-dateutil>=2.8.1
|
||||
matplotlib>=3.4.2
|
||||
cryptography>=2.8
|
||||
git+https://github.com/Klemek/miniscord.git
|
||||
@@ -4,7 +4,7 @@ from collections import defaultdict
|
||||
|
||||
# Custom libs
|
||||
|
||||
from utils import plural, from_now, percent, val_sum, top_key
|
||||
from utils import plural, from_now, percent, val_sum, top_key, utc_today
|
||||
|
||||
|
||||
class Counter:
|
||||
@@ -25,7 +25,7 @@ class Counter:
|
||||
if self.last_used is None:
|
||||
return 0
|
||||
return self.all_usages() + 1 / (
|
||||
100000 * ((datetime.today() - self.last_used).days + 1)
|
||||
100000 * (abs((utc_today() - self.last_used).days) + 1)
|
||||
)
|
||||
|
||||
def all_usages(self) -> int:
|
||||
|
||||
@@ -5,7 +5,7 @@ import discord
|
||||
|
||||
# Custom libs
|
||||
|
||||
from utils import mention, plural, from_now, top_key, percent
|
||||
from utils import mention, plural, from_now, top_key, percent, utc_today
|
||||
|
||||
|
||||
class Emoji:
|
||||
@@ -44,14 +44,14 @@ class Emoji:
|
||||
)
|
||||
|
||||
def life_days(self) -> int:
|
||||
return (datetime.today() - self.emoji.created_at).days
|
||||
return (utc_today() - self.emoji.created_at).days
|
||||
|
||||
def use_days(self) -> int:
|
||||
# If never used, use creation date instead
|
||||
if self.last_used is None:
|
||||
return self.life_days()
|
||||
else:
|
||||
return (datetime.today() - self.last_used).days
|
||||
return (utc_today() - self.last_used).days
|
||||
|
||||
def get_top_member(self) -> int:
|
||||
return top_key(self.members)
|
||||
|
||||
@@ -2,7 +2,6 @@ from typing import List
|
||||
from datetime import timedelta
|
||||
import calendar
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from io import BytesIO
|
||||
import discord
|
||||
import time
|
||||
|
||||
@@ -54,16 +54,16 @@ class Presence:
|
||||
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
|
||||
if show_top_channel
|
||||
else "",
|
||||
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
|
||||
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg,)"
|
||||
if show_top_channel and member_specific
|
||||
else "",
|
||||
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
|
||||
f"- **was mentioned**: {plural(mention_sum, 'time')}"
|
||||
if member_specific and len(self.mentions) > 0
|
||||
else "",
|
||||
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
|
||||
if member_specific and len(self.mentions) > 0
|
||||
else "",
|
||||
f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
|
||||
f"- **mentioned others**: {plural(mention_others_sum, 'time')}"
|
||||
if len(self.mention_others) > 0 and member_specific
|
||||
else "",
|
||||
f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
|
||||
@@ -81,7 +81,7 @@ class Presence:
|
||||
f"- **reactions**: {plural(total_reaction_used, 'time')}"
|
||||
if len(self.reactions) > 0 and not member_specific
|
||||
else "",
|
||||
f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
|
||||
f"- **reactions**: {plural(total_reaction_used, 'time')}"
|
||||
if len(self.reactions) > 0 and member_specific
|
||||
else "",
|
||||
f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
|
||||
|
||||
@@ -66,8 +66,8 @@ class ChannelLogs:
|
||||
is_empty = self.last_message_id is None
|
||||
try:
|
||||
if is_empty:
|
||||
sanity_check = len(await channel.history(limit=1).flatten())
|
||||
if sanity_check != 1:
|
||||
sanity_check = len([message async for message in channel.history(limit=1)])
|
||||
if sanity_check < 1:
|
||||
yield len(self.messages), True
|
||||
return
|
||||
# load backward
|
||||
|
||||
+50
-20
@@ -8,28 +8,30 @@ import time
|
||||
import logging
|
||||
import asyncio
|
||||
import threading
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
from . import ChannelLogs
|
||||
from utils import code_message, delta, deltas
|
||||
|
||||
|
||||
LOG_DIR = "logs"
|
||||
LOG_EXT = ".logz"
|
||||
|
||||
current_analysis = []
|
||||
current_analysis_lock = threading.Lock()
|
||||
|
||||
|
||||
ALREADY_RUNNING = -100
|
||||
CANCELLED = -200
|
||||
NO_FILE = -300
|
||||
|
||||
# 5 minutes, assume 'fast' arg
|
||||
MIN_MODIFICATION_TIME = 5 * 60
|
||||
# ~1 year, remove log file
|
||||
MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
|
||||
load_dotenv()
|
||||
|
||||
LOG_DIR = os.getenv("LOG_DIR", "logs")
|
||||
LOG_EXT = os.getenv("LOG_EXT", ".logz")
|
||||
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
|
||||
|
||||
# 5 minutes, assume 'fast' arg
|
||||
MIN_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 5 * 60))
|
||||
|
||||
# 90 days, remove log file
|
||||
MAX_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 90 * 24 * 60 * 60))
|
||||
|
||||
class Worker:
|
||||
def __init__(
|
||||
@@ -129,29 +131,41 @@ class GuildLogs:
|
||||
channels = {}
|
||||
try:
|
||||
last_time = os.path.getmtime(self.log_file)
|
||||
gziped_data = None
|
||||
await code_message(progress, "Reading saved history (1/4)...")
|
||||
encrypted_data = None
|
||||
await code_message(progress, "Reading saved history (1/5)...")
|
||||
t0 = datetime.now()
|
||||
with open(self.log_file, mode="rb") as f:
|
||||
gziped_data = f.read()
|
||||
encrypted_data = f.read()
|
||||
logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
|
||||
if self.check_cancelled():
|
||||
return CANCELLED, 0
|
||||
await code_message(progress, "Reading saved history (2/4)...")
|
||||
await code_message(progress, "Reading saved history (2/5)...")
|
||||
if CRYPT_KEY == "" or CRYPT_KEY is None:
|
||||
gziped_data = encrypted_data
|
||||
try:
|
||||
t0 = datetime.now()
|
||||
fernet = Fernet(CRYPT_KEY)
|
||||
gziped_data = fernet.decrypt(encrypted_data)
|
||||
logging.info(f"log {self.guild.id} > decrypted in {delta(t0):,}ms")
|
||||
except:
|
||||
gziped_data = encrypted_data
|
||||
if self.check_cancelled():
|
||||
return CANCELLED, 0
|
||||
await code_message(progress, "Reading saved history (3/5)...")
|
||||
t0 = datetime.now()
|
||||
json_data = gzip.decompress(gziped_data)
|
||||
del gziped_data
|
||||
logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
|
||||
if self.check_cancelled():
|
||||
return CANCELLED, 0
|
||||
await code_message(progress, "Reading saved history (3/4)...")
|
||||
await code_message(progress, "Reading saved history (4/5)...")
|
||||
t0 = datetime.now()
|
||||
channels = json.loads(json_data)
|
||||
del json_data
|
||||
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
|
||||
if self.check_cancelled():
|
||||
return CANCELLED, 0
|
||||
await code_message(progress, "Reading saved history (4/4)...")
|
||||
await code_message(progress, "Reading saved history (5/5)...")
|
||||
t0 = datetime.now()
|
||||
self.channels = {
|
||||
int(id): ChannelLogs(channels[id], self) for id in channels
|
||||
@@ -288,7 +302,7 @@ class GuildLogs:
|
||||
return CANCELLED, 0
|
||||
await code_message(
|
||||
progress,
|
||||
f"Saving history (1/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
f"Saving history (1/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
)
|
||||
t0 = datetime.now()
|
||||
json_data = bytes(json.dumps(self.dict()), "utf-8")
|
||||
@@ -299,7 +313,7 @@ class GuildLogs:
|
||||
return CANCELLED, 0
|
||||
await code_message(
|
||||
progress,
|
||||
f"Saving history (2/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
f"Saving history (2/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
)
|
||||
t0 = datetime.now()
|
||||
gziped_data = gzip.compress(json_data)
|
||||
@@ -311,12 +325,28 @@ class GuildLogs:
|
||||
return CANCELLED, 0
|
||||
await code_message(
|
||||
progress,
|
||||
f"Saving history (3/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
f"Saving history (3/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
)
|
||||
if CRYPT_KEY == "" or CRYPT_KEY is None:
|
||||
encrypted_data = gziped_data
|
||||
try:
|
||||
t0 = datetime.now()
|
||||
fernet = Fernet(CRYPT_KEY)
|
||||
encrypted_data = fernet.encrypt(gziped_data)
|
||||
logging.info(f"log {self.guild.id} > encrypted in {delta(t0):,}ms -> {len(gziped_data) / deltas(t0):,.3f} b/s")
|
||||
except:
|
||||
encrypted_data = gziped_data
|
||||
if self.check_cancelled():
|
||||
return CANCELLED, 0
|
||||
await code_message(
|
||||
progress,
|
||||
f"Saving history (4/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
|
||||
)
|
||||
t0 = datetime.now()
|
||||
with open(self.log_file, mode="wb") as f:
|
||||
f.write(gziped_data)
|
||||
f.write(encrypted_data)
|
||||
del gziped_data
|
||||
del encrypted_data
|
||||
logging.info(
|
||||
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
|
||||
)
|
||||
|
||||
+1
-1
@@ -18,7 +18,7 @@ emojis.load_emojis()
|
||||
|
||||
bot = Bot(
|
||||
"Discord Analyst",
|
||||
"1.16.1",
|
||||
"1.17.5",
|
||||
alias="%",
|
||||
)
|
||||
|
||||
|
||||
@@ -69,8 +69,8 @@ class PresenceScanner(Scanner):
|
||||
for mention in message.mentions:
|
||||
pres.mention_others[mention] += 1
|
||||
pres.messages[message.author] += 1
|
||||
pres.channel_total[channel.id] += 1
|
||||
pres.mention_count[message.author] += len(message.mentions)
|
||||
pres.channel_total[channel.id] += 1
|
||||
pres.mention_count[message.author] += len(message.mentions)
|
||||
if len(raw_members) > 0:
|
||||
for mention in message.mentions:
|
||||
if mention in raw_members:
|
||||
|
||||
@@ -18,6 +18,7 @@ from utils import (
|
||||
command_cache,
|
||||
FilterLevel,
|
||||
SPLIT_TOKEN,
|
||||
utc_now
|
||||
)
|
||||
from logs import (
|
||||
GuildLogs,
|
||||
@@ -131,7 +132,7 @@ class Scanner(ABC):
|
||||
self.start_date = None if len(dates) < 1 else min(dates)
|
||||
self.stop_date = None if len(dates) < 2 else max(dates)
|
||||
|
||||
if self.start_date is not None and self.start_date > datetime.now():
|
||||
if self.start_date is not None and self.start_date > utc_now():
|
||||
await message.channel.send(
|
||||
f"Start date is after today", reference=message
|
||||
)
|
||||
@@ -228,7 +229,7 @@ class Scanner(ABC):
|
||||
),
|
||||
)
|
||||
if self.stop_date is None:
|
||||
self.stop_date = datetime.utcnow()
|
||||
self.stop_date = utc_now()
|
||||
|
||||
self.msg_count = 0
|
||||
self.total_msg = 0
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
import os
|
||||
import os.path
|
||||
from dotenv import load_dotenv
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
load_dotenv()
|
||||
|
||||
LOG_DIR = os.getenv("LOG_DIR", "logs")
|
||||
LOG_EXT = os.getenv("LOG_DIR", ".logz")
|
||||
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
|
||||
|
||||
fernet = Fernet(CRYPT_KEY)
|
||||
|
||||
for item in os.listdir(LOG_DIR):
|
||||
if item.endswith(LOG_EXT):
|
||||
path = os.path.join(LOG_DIR, item)
|
||||
data = None
|
||||
with open(path, mode="rb") as f:
|
||||
data = f.read()
|
||||
try:
|
||||
fernet.decrypt(data)
|
||||
print(f"{item} already encrypted")
|
||||
except:
|
||||
with open(path, mode="wb") as f:
|
||||
f.write(fernet.encrypt(data))
|
||||
print(f"{item} was encrypted")
|
||||
+2
-2
@@ -13,7 +13,7 @@ arguments:
|
||||
TEXT = """
|
||||
__**About Analyst-bot's data usage**__
|
||||
**TL;DR**
|
||||
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
|
||||
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
|
||||
**Data collection**
|
||||
Analyst-bot collects a Discord guild/server's history when asked to.
|
||||
This includes:
|
||||
@@ -27,7 +27,7 @@ This does __not__ includes:
|
||||
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
|
||||
**Data storage and retain policy**
|
||||
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
|
||||
Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
|
||||
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
|
||||
**Data sharing**
|
||||
Analyst-bot does not share the data collected with any third-party.
|
||||
**Right to retract**
|
||||
|
||||
+14
-8
@@ -4,7 +4,7 @@ import os
|
||||
import logging
|
||||
import discord
|
||||
import math
|
||||
from datetime import datetime, timedelta
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import re
|
||||
import time
|
||||
import dateutil.parser
|
||||
@@ -116,7 +116,7 @@ def escape_text(text: str) -> str:
|
||||
return discord.utils.escape_markdown(discord.utils.escape_mentions(text))
|
||||
|
||||
|
||||
class FakeMessage:
|
||||
class FakeMessage(discord.abc.Snowflake):
|
||||
def __init__(self, id: int):
|
||||
self.id = id
|
||||
|
||||
@@ -258,13 +258,19 @@ def parse_iso_datetime(str_date: str) -> datetime:
|
||||
RELATIVE_REGEX = r"(yesterday|today|\d*hours?|\d+h(ours?)?|\d*days?|\d+d(ays?)?|\d*weeks?|\d+w(eeks?)?|\d*months?|\d+m(onths?)?|\d*years?|\d+y(ears?)?)"
|
||||
|
||||
|
||||
def utc_now() -> datetime:
|
||||
return datetime.now(tz=timezone.utc)
|
||||
|
||||
def utc_today() -> datetime:
|
||||
today = utc_now().date()
|
||||
return datetime(today.year, today.month, today.day, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def parse_relative_time(src: str) -> datetime:
|
||||
today = datetime.utcnow().date()
|
||||
today = datetime(today.year, today.month, today.day)
|
||||
if src == "today":
|
||||
return today
|
||||
return utc_today()
|
||||
elif src == "yesterday":
|
||||
return today - relativedelta(days=1)
|
||||
return utc_today() - relativedelta(days=1)
|
||||
else:
|
||||
m = re.match("(\d*)(\w+)", src)
|
||||
delta = None
|
||||
@@ -280,7 +286,7 @@ def parse_relative_time(src: str) -> datetime:
|
||||
delta = relativedelta(months=value)
|
||||
elif unit == "y":
|
||||
delta = relativedelta(years=value)
|
||||
return datetime.utcnow() - delta
|
||||
return utc_now() - delta
|
||||
|
||||
|
||||
def parse_time(src: str) -> datetime:
|
||||
@@ -344,7 +350,7 @@ def get_intro(
|
||||
"""
|
||||
time_text = ""
|
||||
if start_datetime is not None:
|
||||
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
|
||||
stop_datetime = utc_now() if stop_datetime is None else stop_datetime
|
||||
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
|
||||
# Show all data (members, channels) when it's less than 5 units
|
||||
if len(members) == 0:
|
||||
|
||||
Reference in New Issue
Block a user