26 Commits

Author SHA1 Message Date
Klemek 78ad50be22 fix counter 2023-10-16 11:16:22 +02:00
Klemek 87217ac31c fix fix 2023-10-16 11:13:42 +02:00
Klemek c503673cbc fix utc_today 2023-10-16 11:10:02 +02:00
Klemek fffaae130c fix dates 2023-10-16 11:03:46 +02:00
Klemek eb6607de60 bump version 2023-10-16 10:18:01 +02:00
Klemek 18fe35e10f 30 to 90 days 2023-10-16 10:17:49 +02:00
Klemek cb37b052c9 fix sanity check 2022-09-02 10:22:12 +02:00
Klemek d8e9e48a13 update requirements.txt 2022-09-02 09:52:33 +02:00
Klemek 6dd45af14f discord v2 2022-09-02 09:41:52 +02:00
Klemek 3cca5c38f5 fix requirements.txt 2022-08-23 09:33:00 +02:00
Klemek 667fb0d414 Merge branch 'master' of github.com:klemek/discord-analyst 2022-08-22 09:22:26 +02:00
Klemek 5089d7d10b force discord.py version 2022-08-22 09:22:23 +02:00
Klemek afb3d7d663 remove invalid stats 2022-05-29 00:09:33 +02:00
Klemek cf2fa3208e Update gdpr.py 2022-03-07 23:08:34 +01:00
Klemek 7b4d952f9e Update GDPR.md 2022-03-07 23:08:11 +01:00
Klemek 2850c7e630 Update gdpr.py 2022-03-07 22:29:37 +01:00
Klemek c252f4cc67 Update GDPR.md 2022-03-07 22:29:09 +01:00
Klemek 96a335bea6 update requirements.txt 2022-03-07 14:12:56 +01:00
Klemek fb48a256ce v1.17 2022-03-07 13:59:42 +01:00
Klemek 0ceffca196 merge with master 2022-03-07 13:28:29 +01:00
Klemek 51911604a9 Update main.py 2022-03-07 13:07:02 +01:00
Klemek 660341127d Update guild_logs.py 2022-03-07 13:06:28 +01:00
Klemek d419a7f2d2 Rename TOS.md to GDPR.md 2022-03-07 12:53:49 +01:00
Klemek b4b818a5c3 Create TOS.md 2022-03-07 12:53:29 +01:00
Klemek b39bc5c16b Update presence_scanner.py 2021-11-05 13:05:17 +01:00
Klemek 130cc5370c Merge pull request #56 from Klemek/dev
v1.16.1
2021-07-13 18:47:12 +02:00
16 changed files with 169 additions and 54 deletions
+4
View File
@@ -0,0 +1,4 @@
DISCORD_TOKEN=
PYTHONPATH=./src
CRYPT_KEY=
LOG_DIR=logs
+46
View File
@@ -0,0 +1,46 @@
# About Analyst-bot's data usage
## TL;DR
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
## Data collection
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
- Visible text channel names
- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)
This does __not__ includes:
- Voice channels and not visible channels
- Not visible text messages
- Visible text messages' embedded content, images and other attachments
## Data processing
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
## Data storage and retain policy
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
## Data sharing
Analyst-bot does not share the data collected with any third-party.
## Right to retract
If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
## Terms agreement
By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
+4 -1
View File
@@ -125,11 +125,14 @@ python3 src/main.py
## Changelog
* **v1.17**
* compliency with 30 days data keeping policy and data encryption
* improvements and bug fix
* **v1.16**
* `%freq graph` graph hours frequency along the week
* uses discord new time format
* `%freq` now shows quietest day of week and hour of day
* improvments and bug fix
* improvements and bug fix
* **v1.15**
* `nsfw:allow/only` filter nsfw channels
* `%find` can use regexes
+6 -6
View File
@@ -1,6 +1,6 @@
discord.py==1.7.0
python-dotenv==0.15.0
python-dateutil==2.8.1
git+git://github.com/Klemek/miniscord.git
numpy
matplotlib
discord.py>=2.0.0
python-dotenv>=0.15.0
python-dateutil>=2.8.1
matplotlib>=3.4.2
cryptography>=2.8
git+https://github.com/Klemek/miniscord.git
+2 -2
View File
@@ -4,7 +4,7 @@ from collections import defaultdict
# Custom libs
from utils import plural, from_now, percent, val_sum, top_key
from utils import plural, from_now, percent, val_sum, top_key, utc_today
class Counter:
@@ -25,7 +25,7 @@ class Counter:
if self.last_used is None:
return 0
return self.all_usages() + 1 / (
100000 * ((datetime.today() - self.last_used).days + 1)
100000 * (abs((utc_today() - self.last_used).days) + 1)
)
def all_usages(self) -> int:
+3 -3
View File
@@ -5,7 +5,7 @@ import discord
# Custom libs
from utils import mention, plural, from_now, top_key, percent
from utils import mention, plural, from_now, top_key, percent, utc_today
class Emoji:
@@ -44,14 +44,14 @@ class Emoji:
)
def life_days(self) -> int:
return (datetime.today() - self.emoji.created_at).days
return (utc_today() - self.emoji.created_at).days
def use_days(self) -> int:
# If never used, use creation date instead
if self.last_used is None:
return self.life_days()
else:
return (datetime.today() - self.last_used).days
return (utc_today() - self.last_used).days
def get_top_member(self) -> int:
return top_key(self.members)
-1
View File
@@ -2,7 +2,6 @@ from typing import List
from datetime import timedelta
import calendar
import matplotlib.pyplot as plt
import numpy as np
from io import BytesIO
import discord
import time
+4 -4
View File
@@ -54,16 +54,16 @@ class Presence:
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
if show_top_channel
else "",
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg,)"
if show_top_channel and member_specific
else "",
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
f"- **was mentioned**: {plural(mention_sum, 'time')}"
if member_specific and len(self.mentions) > 0
else "",
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
if member_specific and len(self.mentions) > 0
else "",
f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
f"- **mentioned others**: {plural(mention_others_sum, 'time')}"
if len(self.mention_others) > 0 and member_specific
else "",
f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
@@ -81,7 +81,7 @@ class Presence:
f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and not member_specific
else "",
f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and member_specific
else "",
f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
+2 -2
View File
@@ -66,8 +66,8 @@ class ChannelLogs:
is_empty = self.last_message_id is None
try:
if is_empty:
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check != 1:
sanity_check = len([message async for message in channel.history(limit=1)])
if sanity_check < 1:
yield len(self.messages), True
return
# load backward
+50 -20
View File
@@ -8,28 +8,30 @@ import time
import logging
import asyncio
import threading
from dotenv import load_dotenv
from cryptography.fernet import Fernet
from . import ChannelLogs
from utils import code_message, delta, deltas
LOG_DIR = "logs"
LOG_EXT = ".logz"
current_analysis = []
current_analysis_lock = threading.Lock()
ALREADY_RUNNING = -100
CANCELLED = -200
NO_FILE = -300
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = 5 * 60
# ~1 year, remove log file
MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
load_dotenv()
LOG_DIR = os.getenv("LOG_DIR", "logs")
LOG_EXT = os.getenv("LOG_EXT", ".logz")
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 5 * 60))
# 90 days, remove log file
MAX_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 90 * 24 * 60 * 60))
class Worker:
def __init__(
@@ -129,29 +131,41 @@ class GuildLogs:
channels = {}
try:
last_time = os.path.getmtime(self.log_file)
gziped_data = None
await code_message(progress, "Reading saved history (1/4)...")
encrypted_data = None
await code_message(progress, "Reading saved history (1/5)...")
t0 = datetime.now()
with open(self.log_file, mode="rb") as f:
gziped_data = f.read()
encrypted_data = f.read()
logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (2/4)...")
await code_message(progress, "Reading saved history (2/5)...")
if CRYPT_KEY == "" or CRYPT_KEY is None:
gziped_data = encrypted_data
try:
t0 = datetime.now()
fernet = Fernet(CRYPT_KEY)
gziped_data = fernet.decrypt(encrypted_data)
logging.info(f"log {self.guild.id} > decrypted in {delta(t0):,}ms")
except:
gziped_data = encrypted_data
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/5)...")
t0 = datetime.now()
json_data = gzip.decompress(gziped_data)
del gziped_data
logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/4)...")
await code_message(progress, "Reading saved history (4/5)...")
t0 = datetime.now()
channels = json.loads(json_data)
del json_data
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (4/4)...")
await code_message(progress, "Reading saved history (5/5)...")
t0 = datetime.now()
self.channels = {
int(id): ChannelLogs(channels[id], self) for id in channels
@@ -288,7 +302,7 @@ class GuildLogs:
return CANCELLED, 0
await code_message(
progress,
f"Saving history (1/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
f"Saving history (1/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
t0 = datetime.now()
json_data = bytes(json.dumps(self.dict()), "utf-8")
@@ -299,7 +313,7 @@ class GuildLogs:
return CANCELLED, 0
await code_message(
progress,
f"Saving history (2/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
f"Saving history (2/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
t0 = datetime.now()
gziped_data = gzip.compress(json_data)
@@ -311,12 +325,28 @@ class GuildLogs:
return CANCELLED, 0
await code_message(
progress,
f"Saving history (3/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
f"Saving history (3/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
if CRYPT_KEY == "" or CRYPT_KEY is None:
encrypted_data = gziped_data
try:
t0 = datetime.now()
fernet = Fernet(CRYPT_KEY)
encrypted_data = fernet.encrypt(gziped_data)
logging.info(f"log {self.guild.id} > encrypted in {delta(t0):,}ms -> {len(gziped_data) / deltas(t0):,.3f} b/s")
except:
encrypted_data = gziped_data
if self.check_cancelled():
return CANCELLED, 0
await code_message(
progress,
f"Saving history (4/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
t0 = datetime.now()
with open(self.log_file, mode="wb") as f:
f.write(gziped_data)
f.write(encrypted_data)
del gziped_data
del encrypted_data
logging.info(
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
)
+1 -1
View File
@@ -18,7 +18,7 @@ emojis.load_emojis()
bot = Bot(
"Discord Analyst",
"1.16.1",
"1.17.5",
alias="%",
)
+2 -2
View File
@@ -69,8 +69,8 @@ class PresenceScanner(Scanner):
for mention in message.mentions:
pres.mention_others[mention] += 1
pres.messages[message.author] += 1
pres.channel_total[channel.id] += 1
pres.mention_count[message.author] += len(message.mentions)
pres.channel_total[channel.id] += 1
pres.mention_count[message.author] += len(message.mentions)
if len(raw_members) > 0:
for mention in message.mentions:
if mention in raw_members:
+3 -2
View File
@@ -18,6 +18,7 @@ from utils import (
command_cache,
FilterLevel,
SPLIT_TOKEN,
utc_now
)
from logs import (
GuildLogs,
@@ -131,7 +132,7 @@ class Scanner(ABC):
self.start_date = None if len(dates) < 1 else min(dates)
self.stop_date = None if len(dates) < 2 else max(dates)
if self.start_date is not None and self.start_date > datetime.now():
if self.start_date is not None and self.start_date > utc_now():
await message.channel.send(
f"Start date is after today", reference=message
)
@@ -228,7 +229,7 @@ class Scanner(ABC):
),
)
if self.stop_date is None:
self.stop_date = datetime.utcnow()
self.stop_date = utc_now()
self.msg_count = 0
self.total_msg = 0
+26
View File
@@ -0,0 +1,26 @@
import os
import os.path
from dotenv import load_dotenv
from cryptography.fernet import Fernet
load_dotenv()
LOG_DIR = os.getenv("LOG_DIR", "logs")
LOG_EXT = os.getenv("LOG_DIR", ".logz")
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
fernet = Fernet(CRYPT_KEY)
for item in os.listdir(LOG_DIR):
if item.endswith(LOG_EXT):
path = os.path.join(LOG_DIR, item)
data = None
with open(path, mode="rb") as f:
data = f.read()
try:
fernet.decrypt(data)
print(f"{item} already encrypted")
except:
with open(path, mode="wb") as f:
f.write(fernet.encrypt(data))
print(f"{item} was encrypted")
+2 -2
View File
@@ -13,7 +13,7 @@ arguments:
TEXT = """
__**About Analyst-bot's data usage**__
**TL;DR**
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
**Data collection**
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
@@ -27,7 +27,7 @@ This does __not__ includes:
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
**Data storage and retain policy**
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
**Data sharing**
Analyst-bot does not share the data collected with any third-party.
**Right to retract**
+14 -8
View File
@@ -4,7 +4,7 @@ import os
import logging
import discord
import math
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import re
import time
import dateutil.parser
@@ -116,7 +116,7 @@ def escape_text(text: str) -> str:
return discord.utils.escape_markdown(discord.utils.escape_mentions(text))
class FakeMessage:
class FakeMessage(discord.abc.Snowflake):
def __init__(self, id: int):
self.id = id
@@ -258,13 +258,19 @@ def parse_iso_datetime(str_date: str) -> datetime:
RELATIVE_REGEX = r"(yesterday|today|\d*hours?|\d+h(ours?)?|\d*days?|\d+d(ays?)?|\d*weeks?|\d+w(eeks?)?|\d*months?|\d+m(onths?)?|\d*years?|\d+y(ears?)?)"
def utc_now() -> datetime:
return datetime.now(tz=timezone.utc)
def utc_today() -> datetime:
today = utc_now().date()
return datetime(today.year, today.month, today.day, tzinfo=timezone.utc)
def parse_relative_time(src: str) -> datetime:
today = datetime.utcnow().date()
today = datetime(today.year, today.month, today.day)
if src == "today":
return today
return utc_today()
elif src == "yesterday":
return today - relativedelta(days=1)
return utc_today() - relativedelta(days=1)
else:
m = re.match("(\d*)(\w+)", src)
delta = None
@@ -280,7 +286,7 @@ def parse_relative_time(src: str) -> datetime:
delta = relativedelta(months=value)
elif unit == "y":
delta = relativedelta(years=value)
return datetime.utcnow() - delta
return utc_now() - delta
def parse_time(src: str) -> datetime:
@@ -344,7 +350,7 @@ def get_intro(
"""
time_text = ""
if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
stop_datetime = utc_now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units
if len(members) == 0: