84 Commits

Author SHA1 Message Date
Klemek 78ad50be22 fix counter 2023-10-16 11:16:22 +02:00
Klemek 87217ac31c fix fix 2023-10-16 11:13:42 +02:00
Klemek c503673cbc fix utc_today 2023-10-16 11:10:02 +02:00
Klemek fffaae130c fix dates 2023-10-16 11:03:46 +02:00
Klemek eb6607de60 bump version 2023-10-16 10:18:01 +02:00
Klemek 18fe35e10f 30 to 90 days 2023-10-16 10:17:49 +02:00
Klemek cb37b052c9 fix sanity check 2022-09-02 10:22:12 +02:00
Klemek d8e9e48a13 update requirements.txt 2022-09-02 09:52:33 +02:00
Klemek 6dd45af14f discord v2 2022-09-02 09:41:52 +02:00
Klemek 3cca5c38f5 fix requirements.txt 2022-08-23 09:33:00 +02:00
Klemek 667fb0d414 Merge branch 'master' of github.com:klemek/discord-analyst 2022-08-22 09:22:26 +02:00
Klemek 5089d7d10b force discord.py version 2022-08-22 09:22:23 +02:00
Klemek afb3d7d663 remove invalid stats 2022-05-29 00:09:33 +02:00
Klemek cf2fa3208e Update gdpr.py 2022-03-07 23:08:34 +01:00
Klemek 7b4d952f9e Update GDPR.md 2022-03-07 23:08:11 +01:00
Klemek 2850c7e630 Update gdpr.py 2022-03-07 22:29:37 +01:00
Klemek c252f4cc67 Update GDPR.md 2022-03-07 22:29:09 +01:00
Klemek 96a335bea6 update requirements.txt 2022-03-07 14:12:56 +01:00
Klemek fb48a256ce v1.17 2022-03-07 13:59:42 +01:00
Klemek 0ceffca196 merge with master 2022-03-07 13:28:29 +01:00
Klemek 51911604a9 Update main.py 2022-03-07 13:07:02 +01:00
Klemek 660341127d Update guild_logs.py 2022-03-07 13:06:28 +01:00
Klemek d419a7f2d2 Rename TOS.md to GDPR.md 2022-03-07 12:53:49 +01:00
Klemek b4b818a5c3 Create TOS.md 2022-03-07 12:53:29 +01:00
Klemek b39bc5c16b Update presence_scanner.py 2021-11-05 13:05:17 +01:00
Klemek 130cc5370c Merge pull request #56 from Klemek/dev
v1.16.1
2021-07-13 18:47:12 +02:00
Klemek ef17c599cd Merge branch 'master' into dev 2021-07-13 18:46:51 +02:00
Klemek a6b963557c improv: black 2021-07-13 18:46:22 +02:00
Klemek 19d09ee6bc improv: better graph 2021-07-13 18:45:50 +02:00
Klemek 1a7c041f67 fix: new channel not loading 2021-07-13 18:35:15 +02:00
Klemek 444c65f343 Merge pull request #55 from Klemek/dev
v1.16
2021-07-13 18:14:52 +02:00
Klemek 20e4c05cc5 improv: black 2021-07-13 18:07:44 +02:00
Klemek 8f4f09bb86 v1.16 2021-07-13 18:06:33 +02:00
Klemek 8b0fe859a7 feat: (BETA) %freq graph 2021-07-13 18:04:46 +02:00
Klemek 07aed12463 feat: use discord new time format 2021-07-13 17:05:16 +02:00
Klemek 499ada0b26 feat: quietest hour of day/week 2021-07-13 16:51:52 +02:00
Klemek c3d3b7ac2e improv: changed the way frequency was stored 2021-07-13 16:47:01 +02:00
Klemek fa840725dd improv: first tests 2021-07-13 16:43:50 +02:00
Klemek e1e1bf117f improv: black 2021-07-13 16:26:04 +02:00
Klemek 14f5709241 fix: frequency scanner using invalid parameter 2021-07-13 15:34:46 +02:00
Klemek dbd859a828 Update Dockerfile 2021-06-09 15:40:33 +02:00
Klemek a3eb623205 Update Dockerfile 2021-06-09 15:38:20 +02:00
Klemek acbcce304e Create docker.yml 2021-06-09 15:32:23 +02:00
Klemek ea82877fd2 Merge branch 'dev' of github.com:klemek/discord-analyst into dev 2021-06-04 15:47:51 +02:00
Klemek 9136cf4ad2 small fix 2021-06-04 15:47:48 +02:00
Klemek ead5f66608 Merge pull request #51 from Klemek/dev
1.15.3 small improvement
2021-06-04 15:38:42 +02:00
Klemek 5b91ca63a9 Merge branch 'master' into dev 2021-06-04 15:37:40 +02:00
Klemek f7116787fc Merge branch 'dev' of github.com:klemek/discord-analyst into dev 2021-06-04 15:36:27 +02:00
Klemek 8ef1b50e3c "valid-arg" skip arg processing 2021-06-04 15:36:24 +02:00
Klemek eb82fcf2aa Merge pull request #49 from Klemek/dev
Dev
2021-06-01 12:10:16 +02:00
Klemek c86af98406 Merge branch 'master' into dev 2021-06-01 12:09:26 +02:00
Klemek 634285f4fc Merge branch 'dev' of github.com:klemek/discord-analyst into dev 2021-06-01 12:08:36 +02:00
Klemek 887f612486 cleaning => set 2021-06-01 12:08:33 +02:00
Klemek be552b6cf3 Merge pull request #48 from Klemek/dev
fix duplicate messages bug
2021-06-01 11:31:45 +02:00
Klemek e808f1f957 Merge branch 'master' into dev 2021-06-01 11:31:04 +02:00
Klemek 975ee7430d fix duplicate messages bug 2021-06-01 11:30:40 +02:00
Klemek ebdc33029c Merge pull request #47 from Klemek/dev
1.15.1 bug fix on images
2021-06-01 09:53:10 +02:00
Klemek 99cd2b301b 1.15.1 bug fix on images 2021-06-01 09:52:14 +02:00
Klemek b838fc7408 Merge pull request #46 from Klemek/dev
bug fix
2021-05-19 15:34:48 +02:00
Klemek b1eddf0b4b bug fix 2021-05-19 15:34:24 +02:00
Klemek 4b42f13d28 Merge pull request #45 from Klemek/dev
updated dockerfile
2021-05-19 15:27:51 +02:00
Klemek 84734c7d4e updated dockerfile 2021-05-19 15:27:17 +02:00
Klemek f2a9cf410e Merge pull request #44 from Klemek/dev
v1.15
2021-05-19 15:19:52 +02:00
Klemek 5b448fe237 Merge branch 'dev' of github.com:klemek/discord-analyst into dev 2021-05-19 15:16:48 +02:00
Klemek 2d32dc37bf updated README 2021-05-19 15:16:43 +02:00
Klemek a6f99256ef updated README 2021-05-19 15:14:07 +02:00
Klemek a8b1ede962 spoiler filtering 2021-05-19 15:11:29 +02:00
Klemek da5e3fdb35 blacked 2021-05-19 13:33:15 +02:00
Klemek 516eb75b5c %first/%rand/%last image 2021-05-19 13:31:07 +02:00
Klemek 13447ff869 fix channel preload 2021-05-19 13:29:37 +02:00
Klemek 1a17e232ed allow queries in %first/%history/%last 2021-05-19 11:59:19 +02:00
Klemek c101002b6c backticks in %find can use regexes 2021-05-19 11:43:32 +02:00
Klemek d5a3667cfb prepare history scanner for images 2021-05-18 18:13:51 +02:00
Klemek b2858cca95 nsfw filters 2021-05-18 18:13:37 +02:00
Klemek a01414dce7 small improvments 2021-05-18 16:54:18 +02:00
Klemek 38056f430f small fixes 2021-05-18 16:08:38 +02:00
Klemek cd9b6b4d00 new alias for random 2021-05-18 16:04:28 +02:00
Klemek 620982f37b Merge pull request #39 from Klemek/dev
v1.14 minor fix
2021-04-22 20:16:20 +02:00
Klemek 245ae3f1df Merge branch 'master' into dev 2021-04-22 20:15:47 +02:00
Klemek 452f53c8f2 assume top if query is singular 2021-04-22 20:14:52 +02:00
Klemek 8e5bab22e7 small fix of formating 2021-04-22 16:26:11 +02:00
Klemek e878aa92d7 Merge pull request #38 from Klemek/dev
v1.14 fix
2021-04-22 16:23:18 +02:00
Klemek 8d1875a362 top arg for %find 2021-04-22 16:22:26 +02:00
Klemek d2cdea3db6 escape text in find scanner 2021-04-22 16:12:13 +02:00
34 changed files with 961 additions and 198 deletions
+4
View File
@@ -0,0 +1,4 @@
DISCORD_TOKEN=
PYTHONPATH=./src
CRYPT_KEY=
LOG_DIR=logs
+34
View File
@@ -0,0 +1,34 @@
name: Docker
on: ["push", "pull_request"]
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Build
uses: docker/build-push-action@v2
with:
context: ./
file: ./Dockerfile
builder: ${{ steps.buildx.outputs.name }}
push: false
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
+2 -2
View File
@@ -1,4 +1,4 @@
FROM python
FROM python:3.8.10
# Create app directory
WORKDIR /usr/src/app
@@ -9,7 +9,7 @@ COPY requirements.txt ./
RUN pip install -r requirements.txt
RUN touch logs/guilds.log && ln -s logs/guilds.log guilds.log
RUN mkdir -p logs && touch logs/guilds.log && ln -s logs/guilds.log guilds.log
# Bundle app source
COPY . .
+46
View File
@@ -0,0 +1,46 @@
# About Analyst-bot's data usage
## TL;DR
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
## Data collection
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
- Visible text channel names
- Visible text messages: date and time of creation and edition, author, content, reactions and other available metadata (pinned, tts, etc.)
This does __not__ includes:
- Voice channels and not visible channels
- Not visible text messages
- Visible text messages' embedded content, images and other attachments
## Data processing
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
## Data storage and retain policy
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
## Data sharing
Analyst-bot does not share the data collected with any third-party.
## Right to retract
If you want to have your data removed, you can use the `%gdpr revoke` command or remove this bot from your guild/server.
## Terms agreement
By agreeing to these terms, you ensure having the legal age if you are in a country that does have one and you also ensure having the consent of every member involved.
*If you want more information, please contact the creator of this bot: <https://github.com/Klemek/discord-analyst>.*
Type `%gdpr agree` to agree to these terms, `%gdpr revoke` to remove this guild/server's collected data or `%gdpr` to see this message again.
+30 -4
View File
@@ -18,13 +18,24 @@
* %freq - frequency analysis
* %compo - composition analysis
* %pres - presence analysis
* %first - read first message
* %rand - read a random message
* %last - read last message
* %find - find specific words or phrases
* %repeat - repeat last analysis (adding supplied arguments)
* %mobile - fix @invalid-user for last command but mentions users
* %gdpr - displays GDPR information
* %find - find specific words or phrases (you can use quotes to add spaces in queries, backticks define regexes)
* arguments:
* top - rank users for these queries
* %first - read first message (add text to filter like %find)
* arguments:
* image - pull an image instead of a message
* spoiler:allow/only - allow spoiler images
* %rand - read a random message (add text to filter like %find)
* arguments:
* image - pull an image instead of a message
* spoiler:allow/only - allow spoiler images
* %last - read last message (add text to filter like %find)
* arguments:
* image - pull an image instead of a message
* spoiler:allow/only - allow spoiler images
* %emojis - rank emojis by their usage
* arguments:
* <n> - top <n> emojis, default is 20
@@ -61,6 +72,7 @@
* all/everyone - include bots messages
* fast: only read cache
* fresh: does not read cache
* nsfw:allow/only - allow messages from nsfw channels
* mobile/mention: mentions users (fix @invalid-user bug)
(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
@@ -113,6 +125,20 @@ python3 src/main.py
## Changelog
* **v1.17**
* compliency with 30 days data keeping policy and data encryption
* improvements and bug fix
* **v1.16**
* `%freq graph` graph hours frequency along the week
* uses discord new time format
* `%freq` now shows quietest day of week and hour of day
* improvements and bug fix
* **v1.15**
* `nsfw:allow/only` filter nsfw channels
* `%find` can use regexes
* `%first`, `%rand` and `%last` can be filter with specific keywords
* `%first`, `%rand` and `%last` can pull images
* bug fix
* **v1.14**
* `mobile/mention` arg to fix mobile bug
* `%repeat`, `%mobile` to repeat commands
+6 -4
View File
@@ -1,4 +1,6 @@
discord.py
python-dotenv
python-dateutil
git+git://github.com/Klemek/miniscord.git
discord.py>=2.0.0
python-dotenv>=0.15.0
python-dateutil>=2.8.1
matplotlib>=3.4.2
cryptography>=2.8
git+https://github.com/Klemek/miniscord.git
+2 -2
View File
@@ -4,7 +4,7 @@ from collections import defaultdict
# Custom libs
from utils import plural, from_now, percent, val_sum, top_key
from utils import plural, from_now, percent, val_sum, top_key, utc_today
class Counter:
@@ -25,7 +25,7 @@ class Counter:
if self.last_used is None:
return 0
return self.all_usages() + 1 / (
100000 * ((datetime.today() - self.last_used).days + 1)
100000 * (abs((utc_today() - self.last_used).days) + 1)
)
def all_usages(self) -> int:
+3 -3
View File
@@ -5,7 +5,7 @@ import discord
# Custom libs
from utils import mention, plural, from_now, top_key, percent
from utils import mention, plural, from_now, top_key, percent, utc_today
class Emoji:
@@ -44,14 +44,14 @@ class Emoji:
)
def life_days(self) -> int:
return (datetime.today() - self.emoji.created_at).days
return (utc_today() - self.emoji.created_at).days
def use_days(self) -> int:
# If never used, use creation date instead
if self.last_used is None:
return self.life_days()
else:
return (datetime.today() - self.last_used).days
return (utc_today() - self.last_used).days
def get_top_member(self) -> int:
return top_key(self.members)
+78 -15
View File
@@ -1,10 +1,12 @@
from typing import List
from datetime import timedelta
import calendar
import matplotlib.pyplot as plt
from io import BytesIO
import discord
import time
from utils import (
str_date,
str_datetime,
from_now,
plural,
percent,
@@ -19,8 +21,7 @@ class Frequency:
self.dates = []
self.longest_break = timedelta(seconds=0)
self.longest_break_start = None
self.week = {i: 0 for i in range(7)}
self.day = {i: 0 for i in range(24)}
self.hours = {i: {j: 0 for j in range(24)} for i in range(7)}
self.busiest_day = None
self.busiest_day_count = 0
self.busiest_hour = None
@@ -33,6 +34,56 @@ class Frequency:
self.longest_streak_start = None
self.longest_streak_author = None
def to_graph(self) -> List[str]:
self.dates.sort()
delta = self.dates[-1] - self.dates[0]
if delta.days == 0:
delta = timedelta(days=1)
day = {j: sum(self.hours[i][j] for i in range(7)) for j in range(24)}
busiest_hour = top_key(day)
n_hours = delta.days
if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:
n_hours += 1
plt.style.use("dark_background")
fig, ax = plt.subplots()
times = range(25)
ax.set_xticks(times)
ax.set_xticklabels([f"{t:0>2}h" if t % 2 == 0 else "" for t in times])
for i in range(7):
hours = [self.hours[i][hour] * 7 / n_hours for hour in range(24)] + [
self.hours[i][0] * 7 / n_hours
]
ax.plot(
times, hours, label=calendar.day_name[i], linestyle="--", linewidth=0.8
)
hours = [day[hour] / n_hours for hour in range(24)] + [day[0] / n_hours]
ax.plot(times, hours, c="r", label="average", linewidth=1.5)
fig.patch.set_facecolor("#36393F")
ax.patch.set_alpha(0)
ax.set_xlim([0, 24])
ax.set_ylim([0, None])
ax.set_ylabel("average messages")
ax.legend(framealpha=0)
ax.grid(True, alpha=0.1)
with BytesIO() as f:
plt.savefig(
f,
format="png",
facecolor=fig.get_facecolor(),
edgecolor="none",
bbox_inches="tight",
dpi=300,
)
f.seek(0)
return [discord.File(f, f"{time.time()}-plot.png")]
def to_string(
self,
*,
@@ -43,8 +94,14 @@ class Frequency:
if delta.days == 0:
delta = timedelta(days=1)
total_msg = len(self.dates)
busiest_weekday = top_key(self.week)
busiest_hour = top_key(self.day)
week = {i: sum(self.hours[i].values()) for i in range(7)}
day = {j: sum(self.hours[i][j] for i in range(7)) for j in range(24)}
busiest_weekday = top_key(week)
busiest_hour = top_key(day)
quietest_weekday = top_key(week, reverse=True)
quietest_hour = top_key(day, reverse=True)
n_weekdays = delta.days // 7
if (
self.dates[0].weekday() <= busiest_weekday
@@ -55,20 +112,26 @@ class Frequency:
if self.dates[0].hour <= busiest_hour and self.dates[-1].hour >= busiest_hour:
n_hours += 1
ret = [
f"- **earliest message**: {str_datetime(self.dates[0])} ({from_now(self.dates[0])})",
f"- **latest message**: {str_datetime(self.dates[-1])} ({from_now(self.dates[-1])})",
f"- **earliest message**: {from_now(self.dates[0])}",
f"- **latest message**: {from_now(self.dates[-1])}",
f"- **messages/day**: {precise(total_msg/delta.days, precision=3)}",
f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(self.week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(self.week[busiest_weekday]/total_msg)})",
f"- **busiest day ever**: {str_date(self.busiest_day)} ({from_now(self.busiest_day)}, {self.busiest_day_count} msg)"
f"- **busiest day of week**: {calendar.day_name[busiest_weekday]} (~{precise(week[busiest_weekday]/n_weekdays, precision=3)} msg, {percent(week[busiest_weekday]/total_msg)})",
f"- **quietest day of week**: {calendar.day_name[quietest_weekday]} (~{precise(week[quietest_weekday]/n_weekdays, precision=3)} msg, {percent(week[quietest_weekday]/total_msg)})"
if week[quietest_weekday] > 0
else "",
f"- **busiest day ever**: {from_now(self.busiest_day)} ({self.busiest_day_count} msg)"
if self.busiest_day is not None
else "",
f"- **messages/hour**: {precise(total_msg*3600/delta.total_seconds(), precision=3)}",
f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(self.day[busiest_hour]/n_hours, precision=3)} msg, {percent(self.day[busiest_hour]/total_msg)})",
f"- **busiest hour ever**: {str_datetime(self.busiest_hour)} ({from_now(self.busiest_hour)}, {self.busiest_hour_count} msg)",
f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}) from {str_datetime(self.longest_break_start)} ({from_now(self.longest_break_start)})",
f"- **busiest hour of day**: {busiest_hour:0>2}:00 (~{precise(day[busiest_hour]/n_hours, precision=3)} msg, {percent(day[busiest_hour]/total_msg)})",
f"- **quietest hour of day**: {quietest_hour:0>2}:00 (~{precise(day[quietest_hour]/n_hours, precision=3)} msg, {percent(day[quietest_hour]/total_msg)})"
if day[quietest_hour] > 0
else "",
f"- **busiest hour ever**: {from_now(self.busiest_hour)} ({self.busiest_hour_count} msg)",
f"- **longest break**: {plural(round(self.longest_break.total_seconds()/3600), 'hour')} ({plural(self.longest_break.days,'day')}), started {from_now(self.longest_break_start)}",
f"- **avg. streak**: {precise(sum(self.streaks)/len(self.streaks), precision=3)} msg",
f"- **longest streak**: {self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)} ({from_now(self.longest_streak_start)})"
f"- **longest streak**: {self.longest_streak:,} msg, started {from_now(self.longest_streak_start)}"
if member_specific
else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg from {str_datetime(self.longest_streak_start)}, {from_now(self.longest_streak_start)})",
else f"- **longest streak**: {mention(self.longest_streak_author)} ({self.longest_streak:,} msg, started {from_now(self.longest_streak_start)})",
]
return ret
+75 -2
View File
@@ -3,13 +3,86 @@ import random
# Custom libs
from utils import mention, from_now, str_datetime, message_link
from utils import (
mention,
from_now,
message_link,
SPLIT_TOKEN,
FilterLevel,
should_allow_spoiler,
is_image_gif,
)
MAX_RANDOM_TRIES = 100
class History:
def __init__(self):
self.messages = []
async def to_string_image(
self, *, type: str, spoiler: FilterLevel, gif_only: bool
) -> List[str]:
if len(self.messages) == 0:
return ["There was no messages matching your filters"]
message = None
intro = None
real_message = None
if type == "first":
self.messages.sort(key=lambda m: m.created_at)
index = 0
while real_message is None and index < len(self.messages):
message = self.messages[index]
real_message = await message.fetch()
if real_message is not None and (
not should_allow_spoiler(real_message, spoiler)
or (gif_only and not is_image_gif(real_message))
):
real_message = None
index += 1
intro = f"First image out of {len(self.messages):,}"
elif type == "last":
self.messages.sort(key=lambda m: m.created_at, reverse=True)
index = 0
while real_message is None and index < len(self.messages):
message = self.messages[index]
real_message = await message.fetch()
if real_message is not None and (
not should_allow_spoiler(real_message, spoiler)
or (gif_only and not is_image_gif(real_message))
):
real_message = None
index += 1
intro = f"Last image out of {len(self.messages):,}"
elif type == "random":
intro = f"Random image out of {len(self.messages):,}"
tries = 0
while real_message is None and tries < MAX_RANDOM_TRIES:
message = random.choice(self.messages)
real_message = await message.fetch()
if real_message is not None and (
not should_allow_spoiler(real_message, spoiler)
or (gif_only and not is_image_gif(real_message))
):
real_message = None
tries += 1
if real_message is None:
return ["There was no messages matching your filters"]
image = "<Error>"
if len(real_message.attachments) > 0:
image = real_message.attachments[0].url
elif len(real_message.embeds) > 0:
image = real_message.embeds[0].url
return [
intro,
f"{from_now(message.created_at)}, {mention(message.author)} sent:",
f"<{message_link(message)}>",
SPLIT_TOKEN,
image,
]
def to_string(self, *, type: str) -> List[str]:
if len(self.messages) == 0:
return ["There was no messages matching your filters"]
@@ -33,7 +106,7 @@ class History:
return [
intro,
f"{str_datetime(message.created_at)} ({from_now(message.created_at)}) {mention(message.author)} said:",
f"{from_now(message.created_at)}, {mention(message.author)} said:",
*text,
f"<{message_link(message)}>",
]
+4 -4
View File
@@ -54,16 +54,16 @@ class Presence:
f"- **most visited channel**: {channel_mention(top_channel)} ({self.channel_usage[top_channel]:,} msg, {percent(self.channel_usage[top_channel]/channel_sum)})"
if show_top_channel
else "",
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg, {percent(self.channel_usage[found_in]/self.channel_total[found_in])} of {type})"
f"- **most contributed channel**: {channel_mention(found_in)} ({self.channel_usage[found_in]:,} msg,)"
if show_top_channel and member_specific
else "",
f"- **was mentioned**: {plural(mention_sum, 'time')} ({percent(mention_sum/val_sum(self.mention_count))} of {type})"
f"- **was mentioned**: {plural(mention_sum, 'time')}"
if member_specific and len(self.mentions) > 0
else "",
f"- **mostly mentioned by**: {mention(top_mention)} ({plural(self.mentions[top_mention], 'time')}, {percent(self.mentions[top_mention]/mention_sum)})"
if member_specific and len(self.mentions) > 0
else "",
f"- **mentioned others**: {plural(mention_others_sum, 'time')} ({percent(mention_others_sum/val_sum(self.mention_count))} of {type})"
f"- **mentioned others**: {plural(mention_others_sum, 'time')}"
if len(self.mention_others) > 0 and member_specific
else "",
f"- **mostly mentioned**: {mention(top_mention_others)} ({plural(self.mention_others[top_mention_others], 'time')}, {percent(self.mention_others[top_mention_others]/mention_others_sum)})"
@@ -81,7 +81,7 @@ class Presence:
f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and not member_specific
else "",
f"- **reactions**: {plural(total_reaction_used, 'time')} ({percent(total_reaction_used/val_sum(self.used_reaction))} of {type})"
f"- **reactions**: {plural(total_reaction_used, 'time')}"
if len(self.reactions) > 0 and member_specific
else "",
f"- **top reactions**: {mention(top_reaction_member)} ({plural(self.used_reaction[top_reaction_member], 'time')}, {percent(self.used_reaction[top_reaction_member]/val_sum(self.used_reaction))})"
+27 -21
View File
@@ -1,16 +1,14 @@
import logging
from typing import Union, Tuple, Any
import discord
from discord import message
from datetime import datetime
from . import MessageLog
from utils import FakeMessage
from utils import serialize, FakeMessage
CHUNK_SIZE = 2000
FORMAT = 3
NOT_SERIALIZED = ["channel", "guild", "start_date"]
class ChannelLogs:
def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -21,7 +19,7 @@ class ChannelLogs:
self.last_message_id = None
self.first_message_id = None
self.format = FORMAT
self.messages = []
self.messages = set()
self.start_date = None
elif isinstance(channel, dict):
self.format = channel["format"] if "format" in channel else None
@@ -40,26 +38,36 @@ class ChannelLogs:
and channel["first_message_id"] is not None
else None
)
self.messages = [
self.messages = {
MessageLog(message, self) for message in channel["messages"]
]
}
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
self.sorted_messages[0].created_at if len(self.messages) > 0 else None
)
def is_format(self):
return self.format == FORMAT
def preload(self, channel: discord.TextChannel):
self.name = channel.name
self.channel = channel
@property
def sorted_messages(self):
return sorted(self.messages)
@property
def nsfw(self):
self.channel.nsfw
async def load(
self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
) -> Tuple[int, int]:
self.name = channel.name
self.channel = channel
is_empty = self.last_message_id is None
try:
if is_empty:
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check != 1:
sanity_check = len([message async for message in channel.history(limit=1)])
if sanity_check < 1:
yield len(self.messages), True
return
# load backward
@@ -94,13 +102,13 @@ class ChannelLogs:
first_message_date = message.created_at
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
self.messages.add(m)
yield len(self.messages), False
if done < CHUNK_SIZE: # reached bottom
self.first_message_id = None
self.last_message_id = channel.last_message_id
# load forward
last_message_date = self.messages[0].created_at
last_message_date = self.sorted_messages[-1].created_at
if not is_empty and (stop_date is None or last_message_date < stop_date):
tmp_message_id = None
while (
@@ -110,26 +118,24 @@ class ChannelLogs:
tmp_message_id = self.last_message_id
async for message in channel.history(
limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id),
after=FakeMessage(self.first_message_id),
oldest_first=True,
):
last_message_date = message.created_at
self.last_message_id = message.id
m = MessageLog(message, self)
await m.load(message)
self.messages.insert(0, m)
self.messages.add(m)
yield len(self.messages), False
except discord.errors.HTTPException:
except discord.errors.HTTPException as e:
yield -1, True
return # When an exception occurs (like Forbidden)
self.start_date = (
self.messages[-1].created_at if len(self.messages) > 0 else None
self.sorted_messages[0].created_at if len(self.messages) > 0 else None
)
yield len(self.messages), True
def dict(self) -> dict:
channel = dict(self.__dict__)
for key in NOT_SERIALIZED:
channel.pop(key, None)
channel = serialize(self, not_serialized=["channel", "guild", "start_date"])
channel["messages"] = [message.dict() for message in self.messages]
return channel
+53 -20
View File
@@ -8,28 +8,30 @@ import time
import logging
import asyncio
import threading
from dotenv import load_dotenv
from cryptography.fernet import Fernet
from . import ChannelLogs
from utils import code_message, delta, deltas
LOG_DIR = "logs"
LOG_EXT = ".logz"
current_analysis = []
current_analysis_lock = threading.Lock()
ALREADY_RUNNING = -100
CANCELLED = -200
NO_FILE = -300
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = 5 * 60
# ~1 year, remove log file
MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
load_dotenv()
LOG_DIR = os.getenv("LOG_DIR", "logs")
LOG_EXT = os.getenv("LOG_EXT", ".logz")
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
# 5 minutes, assume 'fast' arg
MIN_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 5 * 60))
# 90 days, remove log file
MAX_MODIFICATION_TIME = int(os.getenv("MAX_MODIFICATION_TIME", 90 * 24 * 60 * 60))
class Worker:
def __init__(
@@ -129,29 +131,41 @@ class GuildLogs:
channels = {}
try:
last_time = os.path.getmtime(self.log_file)
gziped_data = None
await code_message(progress, "Reading saved history (1/4)...")
encrypted_data = None
await code_message(progress, "Reading saved history (1/5)...")
t0 = datetime.now()
with open(self.log_file, mode="rb") as f:
gziped_data = f.read()
encrypted_data = f.read()
logging.info(f"log {self.guild.id} > read in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (2/4)...")
await code_message(progress, "Reading saved history (2/5)...")
if CRYPT_KEY == "" or CRYPT_KEY is None:
gziped_data = encrypted_data
try:
t0 = datetime.now()
fernet = Fernet(CRYPT_KEY)
gziped_data = fernet.decrypt(encrypted_data)
logging.info(f"log {self.guild.id} > decrypted in {delta(t0):,}ms")
except:
gziped_data = encrypted_data
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/5)...")
t0 = datetime.now()
json_data = gzip.decompress(gziped_data)
del gziped_data
logging.info(f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (3/4)...")
await code_message(progress, "Reading saved history (4/5)...")
t0 = datetime.now()
channels = json.loads(json_data)
del json_data
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
if self.check_cancelled():
return CANCELLED, 0
await code_message(progress, "Reading saved history (4/4)...")
await code_message(progress, "Reading saved history (5/5)...")
t0 = datetime.now()
self.channels = {
int(id): ChannelLogs(channels[id], self) for id in channels
@@ -215,6 +229,8 @@ class GuildLogs:
]
)
total_chan = len(target_channels)
for channel in target_channels:
self.channels[channel.id].preload(channel)
else:
if not self.locked and not self.lock():
return ALREADY_RUNNING, 0
@@ -231,6 +247,7 @@ class GuildLogs:
if channel.id not in self.channels or fresh:
loading_new += 1
self.channels[channel.id] = ChannelLogs(channel, self)
self.channels[channel.id].preload(channel)
workers += [
Worker(self.channels[channel.id], channel, start_date, stop_date)
]
@@ -285,7 +302,7 @@ class GuildLogs:
return CANCELLED, 0
await code_message(
progress,
f"Saving history (1/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
f"Saving history (1/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
t0 = datetime.now()
json_data = bytes(json.dumps(self.dict()), "utf-8")
@@ -296,7 +313,7 @@ class GuildLogs:
return CANCELLED, 0
await code_message(
progress,
f"Saving history (2/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
f"Saving history (2/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
t0 = datetime.now()
gziped_data = gzip.compress(json_data)
@@ -308,12 +325,28 @@ class GuildLogs:
return CANCELLED, 0
await code_message(
progress,
f"Saving history (3/3)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
f"Saving history (3/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
if CRYPT_KEY == "" or CRYPT_KEY is None:
encrypted_data = gziped_data
try:
t0 = datetime.now()
fernet = Fernet(CRYPT_KEY)
encrypted_data = fernet.encrypt(gziped_data)
logging.info(f"log {self.guild.id} > encrypted in {delta(t0):,}ms -> {len(gziped_data) / deltas(t0):,.3f} b/s")
except:
encrypted_data = gziped_data
if self.check_cancelled():
return CANCELLED, 0
await code_message(
progress,
f"Saving history (4/4)...\n{real_total_msg:,} messages in {real_total_chan:,} channels",
)
t0 = datetime.now()
with open(self.log_file, mode="wb") as f:
f.write(gziped_data)
f.write(encrypted_data)
del gziped_data
del encrypted_data
logging.info(
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
)
+23 -24
View File
@@ -1,14 +1,11 @@
from typing import Union, Any
from typing import Optional, Union, Any
import discord
from datetime import datetime
from utils import is_extension
IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
EMBED_IMAGES = ["image", "gifv"]
NOT_SERIALIZED = ["channel"]
from utils import (
serialize,
has_image,
)
class MessageLog:
@@ -39,15 +36,7 @@ class MessageLog:
self.image = False
self.attachment = len(message.attachments) > 0
self.embed = len(message.embeds) > 0
for attachment in message.attachments:
if is_extension(attachment.filename, IMAGE_FORMAT):
self.image = True
break
else:
for embed in message.embeds:
if embed.type in EMBED_IMAGES:
self.image = True
break
self.image = has_image(message)
self.reactions = {}
elif isinstance(message, dict):
self.id = int(message["id"])
@@ -74,18 +63,28 @@ class MessageLog:
self.attachment = message["attachment"]
self.reactions = message["reactions"]
def __eq__(self, other: object) -> bool:
return isinstance(other, self.__class__) and other.id == self.id
def __gt__(self, other: "MessageLog") -> bool:
return self.created_at > other.created_at
def __hash__(self) -> int:
return self.id
async def load(self, message: discord.Message):
for reaction in message.reactions:
self.reactions[str(reaction.emoji)] = []
async for user in reaction.users():
self.reactions[str(reaction.emoji)] += [user.id]
async def fetch(self) -> Optional[discord.Message]:
try:
return await self.channel.channel.fetch_message(self.id)
except (discord.NotFound, discord.Forbidden, discord.HTTPException):
return None
def dict(self) -> dict:
message = dict(self.__dict__)
for key in NOT_SERIALIZED:
message.pop(key, None)
message["created_at"] = self.created_at.isoformat()
message["edited_at"] = (
self.edited_at.isoformat() if self.edited_at is not None else None
return serialize(
self, not_serialized=["channel"], dates=["created_at", "edited_at"]
)
return message
+2 -2
View File
@@ -18,7 +18,7 @@ emojis.load_emojis()
bot = Bot(
"Discord Analyst",
"1.14",
"1.17.5",
alias="%",
)
@@ -81,7 +81,7 @@ bot.register_command(
scanners.LastScanner.help(),
)
bot.register_command(
"rand(om)?",
"(rand(om)?|mood)",
lambda *args: scanners.RandomScanner().compute(*args),
"rand: read a random message",
scanners.RandomScanner.help(),
-1
View File
@@ -1,5 +1,4 @@
from typing import Dict, List
from collections import defaultdict
import discord
+51 -22
View File
@@ -1,4 +1,4 @@
from typing import Dict, List
from typing import Dict, List, Optional, Tuple
from collections import defaultdict
import discord
import re
@@ -13,6 +13,7 @@ from utils import (
plural,
precise,
mention,
escape_text,
)
@@ -21,8 +22,9 @@ class FindScanner(Scanner):
def help() -> str:
return generate_help(
"find",
"Find specific words or phrases (you can use quotes to add spaces in queries)",
"Find specific words or phrases (you can use quotes to add spaces in queries, backticks define regexes)",
args=[
"top - rank users for these queries",
"all/everyone - include bots",
],
example='#mychannel1 #mychannel2 @user "I love you" "you too"',
@@ -31,7 +33,7 @@ class FindScanner(Scanner):
def __init__(self):
super().__init__(
all_args=True,
valid_args=["all", "everyone"],
valid_args=["all", "everyone", "top"],
help=FindScanner.help(),
intro_context="Matches",
)
@@ -39,40 +41,58 @@ class FindScanner(Scanner):
async def init(self, message: discord.Message, *args: str) -> bool:
self.matches = defaultdict(Counter)
self.all_messages = "all" in args or "everyone" in args
self.top = "top" in args or len(self.other_args) == 1
if len(self.other_args) == 0:
await message.channel.send(
"You need to add a query to find (you can use quotes to add spaces in queries)",
"You need to add a query to find (you can use quotes to add spaces in queries, backticks define regexes)",
reference=message,
)
return False
self.queries = [
(query, query.strip("`") if re.match(r"^`.*`$", query) else None)
for query in self.other_args
]
return True
def compute_message(self, channel: ChannelLogs, message: MessageLog):
return FindScanner.analyse_message(
message,
self.matches,
self.other_args,
self.queries,
self.raw_members,
all_messages=self.all_messages,
top=self.top,
)
def get_results(self, intro: str) -> List[str]:
res = [intro]
matches = [match for match in self.matches]
matches.sort(key=lambda match: self.matches[match].score(), reverse=True)
usage_count = Counter.total(self.matches)
res = [intro]
res += [
self.matches[match].to_string(
matches.index(match),
f"`{match}`",
total_usage=self.msg_count,
ranking=False,
transform=lambda id: f" by {mention(id)}",
top=len(self.members) != 1,
)
for match in matches
]
if len(matches) > 1:
if self.top:
res += [
self.matches[match].to_string(
matches.index(match),
mention(match),
total_usage=usage_count,
)
for match in matches
]
else:
res += [
self.matches[match].to_string(
matches.index(match),
f'"{escape_text(match)}"'
if len(match.strip("`")) == len(match)
else match,
total_usage=self.msg_count,
ranking=False,
transform=lambda id: f" by {mention(id)}",
top=len(self.members) != 1,
)
for match in matches
]
if self.top or len(matches) > 1:
res += [
f"Total: {plural(usage_count,'time')} ({precise(usage_count/self.msg_count)}/msg)"
]
@@ -84,10 +104,11 @@ class FindScanner(Scanner):
def analyse_message(
message: MessageLog,
matches: Dict[str, Counter],
queries: List[str],
queries: List[Tuple[str, Optional[str]]],
raw_members: List[int],
*,
all_messages: bool,
top: bool,
) -> bool:
impacted = False
# If author is included in the selection (empty list is all)
@@ -99,7 +120,15 @@ class FindScanner(Scanner):
impacted = True
content = message.content.lower()
for query in queries:
matches[query].update_use(
content.count(query.lower()), message.created_at, message.author
)
if query[1] is not None:
count = len(re.findall(query[1], message.content))
else:
count = content.count(query[0].lower())
if top:
if count > 0:
matches[message.author].update_use(count, message.created_at)
else:
matches[query[0]].update_use(
count, message.created_at, message.author
)
return impacted
+15 -3
View File
@@ -9,10 +9,22 @@ from utils import generate_help
class FirstScanner(HistoryScanner):
@staticmethod
def help() -> str:
return generate_help("first", "Read first message")
return generate_help(
"first",
"Read first message (add text to filter like %find)",
args=[
"image/gif - pull an image instead of a message",
"spoiler:allow/only - allow spoiler images",
],
)
def __init__(self):
super().__init__(help=FirstScanner.help())
def get_results(self, intro: str) -> List[str]:
return self.history.to_string(type="first")
async def get_results(self, intro: str) -> List[str]:
if self.images_only:
return await self.history.to_string_image(
type="first", spoiler=self.spoiler, gif_only=self.gif_only
)
else:
return self.history.to_string(type="first")
+18 -8
View File
@@ -14,11 +14,17 @@ from utils import generate_help
class FrequencyScanner(Scanner):
@staticmethod
def help() -> str:
return generate_help("freq", "Show frequency-related statistics")
return generate_help(
"freq",
"(BETA) Show frequency-related statistics",
args=[
"graph - plot hours of week",
],
)
def __init__(self):
super().__init__(
valid_args=["all", "everyone"],
valid_args=["all", "everyone", "graph"],
help=FrequencyScanner.help(),
intro_context="Frequency",
)
@@ -26,6 +32,8 @@ class FrequencyScanner(Scanner):
async def init(self, message: discord.Message, *args: str) -> bool:
self.freq = Frequency()
self.all_messages = "all" in args or "everyone" in args
self.member_specific = len(self.members) > 0
self.to_graph = "graph" in args
return True
def compute_message(self, channel: ChannelLogs, message: MessageLog):
@@ -35,10 +43,13 @@ class FrequencyScanner(Scanner):
def get_results(self, intro: str) -> List[str]:
FrequencyScanner.compute_results(self.freq)
res = [intro]
res += self.freq.to_string(
member_specific=self.member_specific,
)
if self.to_graph:
res = self.freq.to_graph()
else:
res = [intro]
res += self.freq.to_string(
member_specific=self.member_specific,
)
return res
@staticmethod
@@ -90,8 +101,7 @@ class FrequencyScanner(Scanner):
freq.longest_break_start = latest
latest = date
# calculate busiest weekday / hours
freq.week[date.weekday()] += 1
freq.day[date.hour] += 1
freq.hours[date.weekday()][date.hour] += 1
# calculate busiest day ever
start_delta = date - freq.dates[0]
if start_delta.days > current_day:
+52 -6
View File
@@ -1,26 +1,56 @@
from abc import ABC, abstractmethod
from typing import List
from typing import List, Tuple, Optional
import discord
import re
# Custom libs
from .scanner import Scanner
from data_types import History
from logs import ChannelLogs, MessageLog
from utils import FilterLevel
class HistoryScanner(Scanner, ABC):
def __init__(self, *, help: str):
super().__init__(
has_digit_args=True,
valid_args=["all", "everyone"],
valid_args=[
"all",
"everyone",
"spoiler",
"spoiler:allow",
"spoiler:only",
"image",
"img",
"gif",
],
help=help,
intro_context="",
all_args=True,
)
async def init(self, message: discord.Message, *args: str) -> bool:
self.history = History()
self.all_messages = "all" in args or "everyone" in args
self.images_only = "image" in args or "img" in args or "gif" in args
self.gif_only = "gif" in args
if "spoiler" in args or "spoiler:allow" in args:
self.spoiler = FilterLevel.ALLOW
elif "spoiler:only" in args:
self.spoiler = FilterLevel.ONLY
else:
self.spoiler = FilterLevel.NONE
if not self.images_only:
self.queries = [
(
query.lower(),
query.strip("`") if re.match(r"^`.*`$", query) else None,
)
for query in self.other_args
]
else:
self.queries = []
return True
def compute_message(self, channel: ChannelLogs, message: MessageLog):
@@ -30,6 +60,8 @@ class HistoryScanner(Scanner, ABC):
self.history,
self.raw_members,
all_messages=self.all_messages,
queries=self.queries,
images_only=self.images_only,
)
@abstractmethod
@@ -44,14 +76,28 @@ class HistoryScanner(Scanner, ABC):
raw_members: List[int],
*,
all_messages: bool,
queries: List[Tuple[str, Optional[str]]],
images_only: bool,
) -> bool:
impacted = False
# If author is included in the selection (empty list is all)
if (
(not message.bot or all_messages)
and len(raw_members) == 0
or message.author in raw_members
) and (message.content or message.attachment):
(
(not message.bot or all_messages)
and len(raw_members) == 0
or message.author in raw_members
)
and (message.content or message.attachment)
and (not images_only or message.image)
):
if not images_only:
content = message.content.lower()
for query in queries:
if query[1] is not None:
if not re.match(query[1], message.content):
return False
elif not query[0] in content:
return False
impacted = True
history.messages += [message]
return impacted
+15 -3
View File
@@ -9,10 +9,22 @@ from utils import generate_help
class LastScanner(HistoryScanner):
@staticmethod
def help() -> str:
return generate_help("last", "Read last message")
return generate_help(
"last",
"Read last message (add text to filter like %find)",
args=[
"image/gif - pull an image instead of a message",
"spoiler:allow/only - allow spoiler images",
],
)
def __init__(self):
super().__init__(help=LastScanner.help())
def get_results(self, intro: str) -> List[str]:
return self.history.to_string(type="last")
async def get_results(self, intro: str) -> List[str]:
if self.images_only:
return await self.history.to_string_image(
type="last", spoiler=self.spoiler, gif_only=self.gif_only
)
else:
return self.history.to_string(type="last")
+2 -2
View File
@@ -69,8 +69,8 @@ class PresenceScanner(Scanner):
for mention in message.mentions:
pres.mention_others[mention] += 1
pres.messages[message.author] += 1
pres.channel_total[channel.id] += 1
pres.mention_count[message.author] += len(message.mentions)
pres.channel_total[channel.id] += 1
pres.mention_count[message.author] += len(message.mentions)
if len(raw_members) > 0:
for mention in message.mentions:
if mention in raw_members:
+15 -3
View File
@@ -9,10 +9,22 @@ from utils import generate_help
class RandomScanner(HistoryScanner):
@staticmethod
def help() -> str:
return generate_help("rand", "Read a random message")
return generate_help(
"rand",
"Read a random message (add text to filter like %find)",
args=[
"image/gif - pull an image instead of a message",
"spoiler:allow/only - allow spoiler images",
],
)
def __init__(self):
super().__init__(help=RandomScanner.help())
def get_results(self, intro: str) -> List[str]:
return self.history.to_string(type="random")
async def get_results(self, intro: str) -> List[str]:
if self.images_only:
return await self.history.to_string_image(
type="random", spoiler=self.spoiler, gif_only=self.gif_only
)
else:
return self.history.to_string(type="random")
+82 -18
View File
@@ -4,6 +4,7 @@ from datetime import datetime
import logging
import re
import discord
import inspect
from utils import (
@@ -15,6 +16,9 @@ from utils import (
RELATIVE_REGEX,
parse_time,
command_cache,
FilterLevel,
SPLIT_TOKEN,
utc_now
)
from logs import (
GuildLogs,
@@ -27,7 +31,17 @@ from logs import (
class Scanner(ABC):
VALID_ARGS = ["me", "here", "fast", "fresh", "mobile", "mention"]
VALID_ARGS = [
"me",
"here",
"fast",
"fresh",
"mobile",
"mention",
"nsfw",
"nsfw:allow",
"nsfw:only",
]
def __init__(
self,
@@ -79,7 +93,11 @@ class Scanner(ABC):
dates = []
for i, arg in enumerate(args[1:]):
skip_check = False
if re.match(r"^<@!?\d+>$", arg):
if self.all_args and (
f"'{arg}'" in message.content or f'"{arg}"' in message.content
):
self.other_args += [arg]
elif re.match(r"^<@!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1]
elif re.match(r"^<#!?\d+>$", arg):
arg = arg[3:-1] if "!" in arg else arg[2:-1]
@@ -108,10 +126,13 @@ class Scanner(ABC):
)
return
for arg in self.other_args:
args.remove(arg)
self.start_date = None if len(dates) < 1 else min(dates)
self.stop_date = None if len(dates) < 2 else max(dates)
if self.start_date is not None and self.start_date > datetime.now():
if self.start_date is not None and self.start_date > utc_now():
await message.channel.send(
f"Start date is after today", reference=message
)
@@ -139,6 +160,30 @@ class Scanner(ABC):
self.mention_users = "mention" in args or "mobile" in args
# nsfw filter
if "nsfw" in args or "nsfw:allow" in args:
self.nsfw = FilterLevel.ALLOW
elif "nsfw:only" in args:
self.nsfw = FilterLevel.ONLY
else:
self.nsfw = FilterLevel.NONE
# fix nsfw filter if channel specified
if not self.full and any(channel.nsfw for channel in self.channels):
self.nsfw = FilterLevel.ALLOW
elif all(channel.nsfw for channel in self.channels):
self.nsfw = FilterLevel.ONLY
# filter nsfw channels
if self.nsfw == FilterLevel.NONE:
self.channels = list(
filter(lambda channel: not channel.nsfw, self.channels)
)
elif self.nsfw == FilterLevel.ONLY:
self.channels = list(
filter(lambda channel: channel.nsfw, self.channels)
)
if not await self.init(message, *args):
return
@@ -184,7 +229,7 @@ class Scanner(ABC):
),
)
if self.stop_date is None:
self.stop_date = datetime.utcnow()
self.stop_date = utc_now()
self.msg_count = 0
self.total_msg = 0
@@ -220,18 +265,20 @@ class Scanner(ABC):
await progress.edit(content="```Computing results...```")
# Display results
t0 = datetime.now()
results = self.get_results(
get_intro(
self.intro_context,
self.full,
self.channels,
self.members,
self.msg_count,
self.chan_count,
self.start_date,
self.stop_date,
)
intro = get_intro(
self.intro_context,
self.full,
self.channels,
self.members,
self.msg_count,
self.chan_count,
self.start_date,
self.stop_date,
)
if inspect.iscoroutinefunction(self.get_results):
results = await self.get_results(intro)
else:
results = self.get_results(intro)
logging.info(
f"scan {guild.id} > results in {delta(t0):,}ms"
)
@@ -242,22 +289,39 @@ class Scanner(ABC):
if self.mention_users
else discord.AllowedMentions.none()
)
file = None
for r in results:
if r:
if len(response + "\n" + r) > 2000:
if isinstance(r, discord.File):
file = r
elif isinstance(r, int) and r == SPLIT_TOKEN:
await message.channel.send(
response,
reference=message if first else None,
allowed_mentions=allowed_mentions,
file=file,
)
first = False
file = None
response = ""
response += "\n" + r
if len(response) > 0:
elif isinstance(r, str):
if len(response + "\n" + r) > 2000:
await message.channel.send(
response,
reference=message if first else None,
allowed_mentions=allowed_mentions,
file=file,
)
first = False
file = None
response = ""
response += "\n" + r
if len(response) > 0 or file is not None:
await message.channel.send(
response,
reference=message if first else None,
allowed_mentions=allowed_mentions,
file=file,
)
command_cache.cache(self, message, args)
# Delete custom progress message
+26
View File
@@ -0,0 +1,26 @@
import os
import os.path
from dotenv import load_dotenv
from cryptography.fernet import Fernet
load_dotenv()
LOG_DIR = os.getenv("LOG_DIR", "logs")
LOG_EXT = os.getenv("LOG_DIR", ".logz")
CRYPT_KEY = os.getenv("CRYPT_KEY", "")
fernet = Fernet(CRYPT_KEY)
for item in os.listdir(LOG_DIR):
if item.endswith(LOG_EXT):
path = os.path.join(LOG_DIR, item)
data = None
with open(path, mode="rb") as f:
data = f.read()
try:
fernet.decrypt(data)
print(f"{item} already encrypted")
except:
with open(path, mode="wb") as f:
f.write(fernet.encrypt(data))
print(f"{item} was encrypted")
+2 -2
View File
@@ -13,7 +13,7 @@ arguments:
TEXT = """
__**About Analyst-bot's data usage**__
**TL;DR**
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 18 months or until the bot is leaving the guild/server.
Analyst-bot collects text message information. It does not share collected data with any third-party and data is retained 90 days or until the bot is leaving the guild/server.
**Data collection**
Analyst-bot collects a Discord guild/server's history when asked to.
This includes:
@@ -27,7 +27,7 @@ This does __not__ includes:
Any data collected is only processed in order to produce a one-time report sent to the user immediately. No temporary data are retained.
**Data storage and retain policy**
Analyst-bot stores the collected data in files that are accessible by the software and its administrator only.
Any collected data are retained maximum 18 months until deletion or when the bot is leaving a guild/server.
Any collected data are retained maximum 90 days until deletion or when the bot is leaving a guild/server.
**Data sharing**
Analyst-bot does not share the data collected with any third-party.
**Right to retract**
+102 -27
View File
@@ -1,11 +1,12 @@
from calendar import month
from enum import IntEnum
from typing import Callable, List, Dict, Union, Optional, Any
import os
import logging
import discord
import math
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
import re
import time
import dateutil.parser
from dateutil.relativedelta import relativedelta
@@ -18,6 +19,7 @@ COMMON_HELP_ARGS = [
"<date2> - filter before <date2>",
"fast - only read cache",
"fresh - does not read cache (long)",
"nsfw:allow/only - allow messages from nsfw channels",
"mobile/mention - mentions users (fix @invalid-user bug)",
]
@@ -31,7 +33,7 @@ def generate_help(
replace_args=[],
):
arg_list = "* " + "\n* ".join(
replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
args + replace_args + COMMON_HELP_ARGS[len(replace_args) :]
)
return f"""```
%{cmd}: {info}
@@ -50,6 +52,35 @@ def deltas(t0: datetime):
return (datetime.now() - t0).total_seconds()
class FilterLevel(IntEnum):
NONE = 0
ALLOW = 1
ONLY = 2
SPLIT_TOKEN = 1152317803
# FILE
IMAGE_FORMAT = [".png", ".jpg", ".jpeg", ".bmp"]
EMBED_IMAGES = ["image"]
GIF_FORMAT = [".gif", ".gifv"]
EMBED_GIF = ["gifv"]
def is_extension(filepath: str, ext_list: List[str]) -> bool:
filename, file_extension = os.path.splitext(filepath.lower())
return file_extension in ext_list
def get_resource_path(filename: str) -> str:
return os.path.realpath(
os.path.join(os.path.dirname(__file__), "..", "resources", filename)
)
# DISCORD API
@@ -81,22 +112,50 @@ def message_link(message: discord.Message) -> str:
return f"https://discord.com/channels/{message.channel.guild.id}/{message.channel.id}/{message.id}"
class FakeMessage:
def escape_text(text: str) -> str:
return discord.utils.escape_markdown(discord.utils.escape_mentions(text))
class FakeMessage(discord.abc.Snowflake):
def __init__(self, id: int):
self.id = id
# FILE
def has_image(message: discord.Message) -> bool:
for attachment in message.attachments:
if is_extension(attachment.filename, GIF_FORMAT + IMAGE_FORMAT):
return True
for embed in message.embeds:
if embed.type in (EMBED_IMAGES + EMBED_GIF):
return True
return False
def is_extension(filepath: str, ext_list: List[str]) -> bool:
filename, file_extension = os.path.splitext(filepath.lower())
return file_extension in ext_list
def is_image_spoiler(message: discord.Message) -> bool:
if len(message.attachments) > 0:
return message.attachments[0].is_spoiler()
elif len(message.embeds) > 0:
return re.match(r"\|\|[^|]*http[^|]\|\|", message.content.lower()) is not None
else:
return False
def get_resource_path(filename: str) -> str:
return os.path.realpath(
os.path.join(os.path.dirname(__file__), "..", "resources", filename)
def is_image_gif(message: discord.Message) -> bool:
if len(message.attachments) > 0:
return is_extension(message.attachments[0].filename, GIF_FORMAT)
elif len(message.embeds) > 0:
return message.embeds[0].type in EMBED_GIF
else:
return False
def should_allow_spoiler(message: discord.Message, spoiler: FilterLevel) -> bool:
is_spoiler = is_image_spoiler(message)
return (
not is_spoiler
and spoiler <= FilterLevel.ONLY
or is_spoiler
and spoiler >= FilterLevel.ALLOW
)
@@ -119,13 +178,13 @@ def no_duplicate(seq: list) -> list:
def top_key(
d: Dict[Union[str, int], int], key: Optional[Callable] = None
d: Dict[Union[str, int], int], key: Optional[Callable] = None, reverse: bool = False
) -> Union[str, int]:
if len(d) == 0:
return None
if key is None:
key = lambda k: d[k]
return sorted(d, key=key)[-1]
return sorted(d, key=key, reverse=reverse)[-1]
def val_sum(d: Dict[Any, int]) -> int:
@@ -134,6 +193,21 @@ def val_sum(d: Dict[Any, int]) -> int:
return sum(d.values())
def serialize(
obj: Any, *, not_serialized: List[str] = [], dates: List[str] = []
) -> Dict:
output = dict(obj.__dict__)
for key in not_serialized:
output.pop(key, None)
for key in dates:
if output[key]:
try:
output[key] = getattr(obj, key).isoformat()
except AttributeError:
pass
return output
# MESSAGE FORMATTING
@@ -184,13 +258,19 @@ def parse_iso_datetime(str_date: str) -> datetime:
RELATIVE_REGEX = r"(yesterday|today|\d*hours?|\d+h(ours?)?|\d*days?|\d+d(ays?)?|\d*weeks?|\d+w(eeks?)?|\d*months?|\d+m(onths?)?|\d*years?|\d+y(ears?)?)"
def utc_now() -> datetime:
return datetime.now(tz=timezone.utc)
def utc_today() -> datetime:
today = utc_now().date()
return datetime(today.year, today.month, today.day, tzinfo=timezone.utc)
def parse_relative_time(src: str) -> datetime:
today = datetime.utcnow().date()
today = datetime(today.year, today.month, today.day)
if src == "today":
return today
return utc_today()
elif src == "yesterday":
return today - relativedelta(days=1)
return utc_today() - relativedelta(days=1)
else:
m = re.match("(\d*)(\w+)", src)
delta = None
@@ -206,7 +286,7 @@ def parse_relative_time(src: str) -> datetime:
delta = relativedelta(months=value)
elif unit == "y":
delta = relativedelta(years=value)
return datetime.utcnow() - delta
return utc_now() - delta
def parse_time(src: str) -> datetime:
@@ -217,11 +297,11 @@ def parse_time(src: str) -> datetime:
def str_date(date: datetime) -> str:
return date.strftime("%d %b. %Y") # 12 Jun. 2018
return f"<t:{int(time.mktime(date.timetuple()))}:D>"
def str_datetime(date: datetime) -> str:
return date.strftime("%H:%M, %d %b. %Y") # 12:05, 12 Jun. 2018
return f"<t:{int(time.mktime(date.timetuple()))}:f>"
def str_delta(delay: timedelta) -> str:
@@ -249,12 +329,7 @@ def str_delta(delay: timedelta) -> str:
def from_now(src: Optional[datetime]) -> str:
if src is None:
return "never"
output = str_delta(datetime.utcnow() - src)
if output == "no time":
return "now"
elif output == "one day":
return "yesterday"
return output + " ago"
return f"<t:{int(time.mktime(src.timetuple()))}:R>"
# APP SPECIFIC
@@ -275,7 +350,7 @@ def get_intro(
"""
time_text = ""
if start_datetime is not None:
stop_datetime = datetime.now() if stop_datetime is None else stop_datetime
stop_datetime = utc_now() if stop_datetime is None else stop_datetime
time_text = f" (in {str_delta(stop_datetime - start_datetime)})"
# Show all data (members, channels) when it's less than 5 units
if len(members) == 0:
+3
View File
@@ -0,0 +1,3 @@
pytest~=6.2.3
pytest-cov
coveralls
View File
View File
@@ -0,0 +1,90 @@
from unittest import TestCase
from unittest.mock import MagicMock
from src.scanners import FirstScanner
from datetime import datetime, timedelta
from tests.utils import AsyncTestCase, fake_message
class TestFirstScanner(AsyncTestCase):
def test_help(self):
self.assertGreater(len(FirstScanner.help()), 0)
self.assertIn("%first", FirstScanner.help())
def test_empty_no_messages(self):
scanner = FirstScanner()
command_msg = MagicMock()
self._await(scanner.init(command_msg, []))
results = self._await(scanner.get_results(""))
self.assertListEqual(["There was no messages matching your filters"], results)
def test_empty_filtered(self):
scanner = FirstScanner()
scanner.raw_members = [1]
self._await(scanner.init(fake_message(), []))
messages = [fake_message(author=2), fake_message(author=3)]
for msg in messages:
scanner.compute_message(msg.channel, msg)
results = self._await(scanner.get_results(""))
self.assertListEqual(["There was no messages matching your filters"], results)
def test_normal(self):
scanner = FirstScanner()
self._await(scanner.init(fake_message(), []))
messages = [
fake_message(id=1, created_at=timedelta(days=-2)),
fake_message(id=2, created_at=timedelta(days=-3)),
fake_message(id=3, created_at=timedelta(days=-1)),
]
for msg in messages:
scanner.compute_message(msg.channel, msg)
results = self._await(scanner.get_results(""))
expected = messages[1]
self.assertListEqual(
[
"First message out of 3",
f"{expected.created_at.strftime('%H:%M, %d %b. %Y')} (2 days ago) <@1> said:",
f"> {expected.content}",
"<https://discord.com/channels/1/1/2>",
],
results,
)
def test_filtered(self):
scanner = FirstScanner()
scanner.raw_members = [1]
self._await(scanner.init(fake_message(), []))
messages = [
fake_message(id=1, author=1, created_at=timedelta(days=-2)),
fake_message(id=2, author=2, created_at=timedelta(days=-3)),
fake_message(id=3, author=1, created_at=timedelta(days=-1)),
]
for msg in messages:
scanner.compute_message(msg.channel, msg)
results = self._await(scanner.get_results(""))
expected = messages[0]
self.assertListEqual(
[
"First message out of 2",
f"{expected.created_at.strftime('%H:%M, %d %b. %Y')} (yesterday) <@1> said:",
f"> {expected.content}",
"<https://discord.com/channels/1/1/1>",
],
results,
)
View File
+99
View File
@@ -0,0 +1,99 @@
from typing import List, Optional, Dict, Union
from unittest import TestCase
import asyncio
from datetime import datetime, timedelta
from unittest.mock import MagicMock
import random
import string
class AsyncTestCase(TestCase):
def setUp(self):
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(None)
def tearDown(self):
self.loop.close()
def _await(self, fn):
return self.loop.run_until_complete(fn)
RANDOM_TEXT_CHARS = string.ascii_letters + string.digits + string.punctuation
def random_text(min_len: int = 3, max_len: int = 45):
return "".join(
random.choice(RANDOM_TEXT_CHARS)
for _ in range(random.randrange(min_len, max_len))
)
def fake_guild(id: int = 1):
return MagicMock(id=id)
def fake_channel(id: int = 1, name: str = "fake-channel"):
return MagicMock(id=id, name=name, guild=fake_guild())
def fake_message(
id: int = 1,
channel_id: int = 1,
channel_name: str = "fake-channel",
created_at: Optional[Union[datetime, timedelta]] = None,
edited_at: Optional[datetime] = None,
author: int = 1,
pinned: bool = False,
mention_everyone: bool = False,
tts: bool = False,
bot: bool = False,
content: Optional[str] = None,
mentions: Optional[List[int]] = None,
reference: Optional[int] = None,
role_mentions: Optional[List[int]] = None,
channel_mentions: Optional[List[int]] = None,
image: bool = False,
attachment: bool = False,
embed: bool = False,
reactions: Optional[Dict[str, List[int]]] = None,
):
if created_at is None:
created_at = datetime.now() + timedelta(hours=random.randrange(-30 * 24, 0))
elif isinstance(created_at, timedelta):
created_at = datetime.now() + created_at
if isinstance(edited_at, timedelta):
edited_at = datetime.now() + edited_at
if content is None:
content = random_text()
if mentions is None:
mentions = []
if role_mentions is None:
role_mentions = []
if channel_mentions is None:
channel_mentions = []
if reactions is None:
reactions = {}
return MagicMock(
id=id,
channel=fake_channel(channel_id, channel_name),
created_at=created_at,
edited_at=edited_at,
author=author,
pinned=pinned,
mention_everyone=mention_everyone,
tts=tts,
bot=bot,
content=content,
mentions=mentions,
raw_mentions=mentions,
reference=reference,
role_mentions=role_mentions,
raw_role_mentions=role_mentions,
channel_mentions=channel_mentions,
raw_channel_mentions=channel_mentions,
image=image,
attachment=attachment,
embed=embed,
reactions=reactions,
)