@@ -5,3 +5,4 @@ __pycache__
|
|||||||
error_*
|
error_*
|
||||||
*.log
|
*.log
|
||||||
/logs/
|
/logs/
|
||||||
|
.vscode
|
||||||
@@ -43,7 +43,10 @@
|
|||||||
* %react - rank users by their reactions
|
* %react - rank users by their reactions
|
||||||
* arguments:
|
* arguments:
|
||||||
* <n> - top <n> messages, default is 10
|
* <n> - top <n> messages, default is 10
|
||||||
* %cancel - cancel current analysis
|
* %words - rank words by their usage
|
||||||
|
* arguments:
|
||||||
|
* <n> - top <n> words, default is 10
|
||||||
|
* %cancel - cancel current analysis (not launched with fast)
|
||||||
|
|
||||||
* Common arguments:
|
* Common arguments:
|
||||||
* @member/me: filter for one or more member
|
* @member/me: filter for one or more member
|
||||||
@@ -100,8 +103,12 @@ python3 src/main.py
|
|||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
* **v1.12**
|
||||||
|
* more scans: `%words`
|
||||||
|
* concurrent `fast` analysis
|
||||||
|
* assume `fast` if last analysis is fresh
|
||||||
* **v1.11**
|
* **v1.11**
|
||||||
* more scans `%first`, `%rand`, `%last`
|
* more scans: `%first`, `%rand`, `%last`
|
||||||
* streak computing in `%pres`
|
* streak computing in `%pres`
|
||||||
* **v1.10**
|
* **v1.10**
|
||||||
* multithreading for queries
|
* multithreading for queries
|
||||||
|
|||||||
@@ -40,7 +40,12 @@ class ChannelLogs:
|
|||||||
self.channel = channel
|
self.channel = channel
|
||||||
try:
|
try:
|
||||||
if self.last_message_id is not None: # append
|
if self.last_message_id is not None: # append
|
||||||
while self.last_message_id != channel.last_message_id:
|
tmp_message_id = None
|
||||||
|
while (
|
||||||
|
self.last_message_id != channel.last_message_id
|
||||||
|
and self.last_message_id != tmp_message_id
|
||||||
|
):
|
||||||
|
tmp_message_id = self.last_message_id
|
||||||
async for message in channel.history(
|
async for message in channel.history(
|
||||||
limit=CHUNK_SIZE,
|
limit=CHUNK_SIZE,
|
||||||
after=FakeMessage(self.last_message_id),
|
after=FakeMessage(self.last_message_id),
|
||||||
|
|||||||
+75
-17
@@ -4,6 +4,7 @@ import discord
|
|||||||
import json
|
import json
|
||||||
import gzip
|
import gzip
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import time
|
||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
import threading
|
import threading
|
||||||
@@ -22,6 +23,8 @@ current_analysis_lock = threading.Lock()
|
|||||||
ALREADY_RUNNING = -100
|
ALREADY_RUNNING = -100
|
||||||
CANCELLED = -200
|
CANCELLED = -200
|
||||||
|
|
||||||
|
MIN_MODIFICATION_TIME = 5 * 60
|
||||||
|
|
||||||
|
|
||||||
class Worker:
|
class Worker:
|
||||||
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
|
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
|
||||||
@@ -53,12 +56,39 @@ class GuildLogs:
|
|||||||
self.guild = guild
|
self.guild = guild
|
||||||
self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
|
self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
|
||||||
self.channels = {}
|
self.channels = {}
|
||||||
|
self.locked = False
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, type, value, tb):
|
||||||
|
del self.channels
|
||||||
|
del self.guild
|
||||||
|
if self.locked:
|
||||||
|
self.unlock()
|
||||||
|
|
||||||
def dict(self) -> dict:
|
def dict(self) -> dict:
|
||||||
return {id: self.channels[id].dict() for id in self.channels}
|
return {id: self.channels[id].dict() for id in self.channels}
|
||||||
|
|
||||||
def check_cancelled(self) -> bool:
|
def check_cancelled(self) -> bool:
|
||||||
return self.log_file not in current_analysis
|
return self.locked and self.log_file not in current_analysis
|
||||||
|
|
||||||
|
def lock(self) -> bool:
|
||||||
|
self.locked = True
|
||||||
|
current_analysis_lock.acquire()
|
||||||
|
if self.log_file in current_analysis:
|
||||||
|
current_analysis_lock.release()
|
||||||
|
return False
|
||||||
|
current_analysis.append(self.log_file)
|
||||||
|
current_analysis_lock.release()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def unlock(self):
|
||||||
|
self.locked = False
|
||||||
|
current_analysis_lock.acquire()
|
||||||
|
if self.log_file in current_analysis:
|
||||||
|
current_analysis.remove(self.log_file)
|
||||||
|
current_analysis_lock.release()
|
||||||
|
|
||||||
async def load(
|
async def load(
|
||||||
self,
|
self,
|
||||||
@@ -68,19 +98,18 @@ class GuildLogs:
|
|||||||
fast: bool,
|
fast: bool,
|
||||||
fresh: bool,
|
fresh: bool,
|
||||||
) -> Tuple[int, int]:
|
) -> Tuple[int, int]:
|
||||||
current_analysis_lock.acquire()
|
self.locked = False
|
||||||
if self.log_file in current_analysis:
|
if not fast and not self.lock():
|
||||||
current_analysis_lock.release()
|
|
||||||
return ALREADY_RUNNING, 0
|
return ALREADY_RUNNING, 0
|
||||||
current_analysis.append(self.log_file)
|
|
||||||
current_analysis_lock.release()
|
|
||||||
t00 = datetime.now()
|
t00 = datetime.now()
|
||||||
# read logs
|
# read logs
|
||||||
if not os.path.exists(LOG_DIR):
|
if not os.path.exists(LOG_DIR):
|
||||||
os.mkdir(LOG_DIR)
|
os.mkdir(LOG_DIR)
|
||||||
|
last_time = None
|
||||||
if os.path.exists(self.log_file):
|
if os.path.exists(self.log_file):
|
||||||
channels = {}
|
channels = {}
|
||||||
try:
|
try:
|
||||||
|
last_time = os.path.getmtime(self.log_file)
|
||||||
gziped_data = None
|
gziped_data = None
|
||||||
await code_message(progress, "Reading saved history (1/4)...")
|
await code_message(progress, "Reading saved history (1/4)...")
|
||||||
t0 = datetime.now()
|
t0 = datetime.now()
|
||||||
@@ -92,6 +121,7 @@ class GuildLogs:
|
|||||||
await code_message(progress, "Reading saved history (2/4)...")
|
await code_message(progress, "Reading saved history (2/4)...")
|
||||||
t0 = datetime.now()
|
t0 = datetime.now()
|
||||||
json_data = gzip.decompress(gziped_data)
|
json_data = gzip.decompress(gziped_data)
|
||||||
|
del gziped_data
|
||||||
logging.info(
|
logging.info(
|
||||||
f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
|
f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
|
||||||
)
|
)
|
||||||
@@ -100,6 +130,7 @@ class GuildLogs:
|
|||||||
await code_message(progress, "Reading saved history (3/4)...")
|
await code_message(progress, "Reading saved history (3/4)...")
|
||||||
t0 = datetime.now()
|
t0 = datetime.now()
|
||||||
channels = json.loads(json_data)
|
channels = json.loads(json_data)
|
||||||
|
del json_data
|
||||||
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
|
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
|
||||||
if self.check_cancelled():
|
if self.check_cancelled():
|
||||||
return CANCELLED, 0
|
return CANCELLED, 0
|
||||||
@@ -122,15 +153,40 @@ class GuildLogs:
|
|||||||
else:
|
else:
|
||||||
fast = False
|
fast = False
|
||||||
|
|
||||||
|
if len(target_channels) == 0:
|
||||||
|
target_channels = (
|
||||||
|
self.channels.values() if fast else self.guild.text_channels
|
||||||
|
)
|
||||||
|
elif fast:
|
||||||
|
# select already loaded channels only
|
||||||
|
target_channels_tmp = [
|
||||||
|
channel for channel in target_channels if channel.id in self.channels
|
||||||
|
]
|
||||||
|
if len(target_channels_tmp) == 0:
|
||||||
|
fast = False
|
||||||
|
else:
|
||||||
|
target_channels = target_channels_tmp
|
||||||
|
|
||||||
|
# assume fast if file is fresh
|
||||||
|
if (
|
||||||
|
not fast
|
||||||
|
and not fresh
|
||||||
|
and last_time is not None
|
||||||
|
and (time.time() - last_time) < MIN_MODIFICATION_TIME
|
||||||
|
):
|
||||||
|
invalid_target_channels = [
|
||||||
|
channel
|
||||||
|
for channel in target_channels
|
||||||
|
if channel.id not in self.channels
|
||||||
|
]
|
||||||
|
if len(invalid_target_channels) == 0:
|
||||||
|
fast = True
|
||||||
|
if self.locked:
|
||||||
|
self.unlock()
|
||||||
|
|
||||||
total_msg = 0
|
total_msg = 0
|
||||||
total_chan = 0
|
total_chan = 0
|
||||||
if fast:
|
if fast:
|
||||||
if len(target_channels) == 0:
|
|
||||||
total_msg = sum(
|
|
||||||
[len(channel.messages) for channel in self.channels.values()]
|
|
||||||
)
|
|
||||||
total_chan = len(self.channels)
|
|
||||||
else:
|
|
||||||
target_channels_id = [channel.id for channel in target_channels]
|
target_channels_id = [channel.id for channel in target_channels]
|
||||||
total_msg = sum(
|
total_msg = sum(
|
||||||
[
|
[
|
||||||
@@ -141,12 +197,10 @@ class GuildLogs:
|
|||||||
)
|
)
|
||||||
total_chan = len(target_channels)
|
total_chan = len(target_channels)
|
||||||
else:
|
else:
|
||||||
|
if not self.locked and not self.lock():
|
||||||
|
return ALREADY_RUNNING, 0
|
||||||
# load channels
|
# load channels
|
||||||
t0 = datetime.now()
|
t0 = datetime.now()
|
||||||
if len(target_channels) == 0:
|
|
||||||
target_channels = (
|
|
||||||
self.guild.text_channels if not fast else self.channels.keys()
|
|
||||||
)
|
|
||||||
loading_new = 0
|
loading_new = 0
|
||||||
queried_msg = 0
|
queried_msg = 0
|
||||||
total_chan = 0
|
total_chan = 0
|
||||||
@@ -225,6 +279,7 @@ class GuildLogs:
|
|||||||
)
|
)
|
||||||
t0 = datetime.now()
|
t0 = datetime.now()
|
||||||
gziped_data = gzip.compress(json_data)
|
gziped_data = gzip.compress(json_data)
|
||||||
|
del json_data
|
||||||
logging.info(
|
logging.info(
|
||||||
f"log {self.guild.id} > gzip in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
|
f"log {self.guild.id} > gzip in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
|
||||||
)
|
)
|
||||||
@@ -237,6 +292,7 @@ class GuildLogs:
|
|||||||
t0 = datetime.now()
|
t0 = datetime.now()
|
||||||
with open(self.log_file, mode="wb") as f:
|
with open(self.log_file, mode="wb") as f:
|
||||||
f.write(gziped_data)
|
f.write(gziped_data)
|
||||||
|
del gziped_data
|
||||||
logging.info(
|
logging.info(
|
||||||
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
|
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
|
||||||
)
|
)
|
||||||
@@ -247,6 +303,7 @@ class GuildLogs:
|
|||||||
f"Analysing...\n{total_msg:,} messages in {total_chan:,} channels",
|
f"Analysing...\n{total_msg:,} messages in {total_chan:,} channels",
|
||||||
)
|
)
|
||||||
logging.info(f"log {self.guild.id} > TOTAL TIME: {delta(t00):,}ms")
|
logging.info(f"log {self.guild.id} > TOTAL TIME: {delta(t00):,}ms")
|
||||||
|
if self.locked:
|
||||||
current_analysis_lock.acquire()
|
current_analysis_lock.acquire()
|
||||||
current_analysis.remove(self.log_file)
|
current_analysis.remove(self.log_file)
|
||||||
current_analysis_lock.release()
|
current_analysis_lock.release()
|
||||||
@@ -262,5 +319,6 @@ class GuildLogs:
|
|||||||
else:
|
else:
|
||||||
current_analysis_lock.release()
|
current_analysis_lock.release()
|
||||||
await message.channel.send(
|
await message.channel.send(
|
||||||
f"No analysis are currently running on this server", reference=message
|
f"No cancellable analysis are currently running on this server",
|
||||||
|
reference=message,
|
||||||
)
|
)
|
||||||
|
|||||||
+10
-3
@@ -21,6 +21,7 @@ from scanners import (
|
|||||||
FirstScanner,
|
FirstScanner,
|
||||||
RandomScanner,
|
RandomScanner,
|
||||||
LastScanner,
|
LastScanner,
|
||||||
|
WordsScanner,
|
||||||
)
|
)
|
||||||
from logs import GuildLogs
|
from logs import GuildLogs
|
||||||
|
|
||||||
@@ -32,7 +33,7 @@ emojis.load_emojis()
|
|||||||
|
|
||||||
bot = Bot(
|
bot = Bot(
|
||||||
"Discord Analyst",
|
"Discord Analyst",
|
||||||
"1.11",
|
"1.12",
|
||||||
alias="%",
|
alias="%",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,8 +42,8 @@ bot.log_calls = True
|
|||||||
bot.register_command(
|
bot.register_command(
|
||||||
"(cancel|stop)",
|
"(cancel|stop)",
|
||||||
GuildLogs.cancel,
|
GuildLogs.cancel,
|
||||||
"cancel: stop current analysis",
|
"cancel: stop current analysis (not launched with fast)",
|
||||||
"```\n" + "%cancel: Stop current analysis\n" + "```",
|
"```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
|
||||||
)
|
)
|
||||||
bot.register_command(
|
bot.register_command(
|
||||||
"last",
|
"last",
|
||||||
@@ -62,6 +63,12 @@ bot.register_command(
|
|||||||
"first: read first message",
|
"first: read first message",
|
||||||
FirstScanner.help(),
|
FirstScanner.help(),
|
||||||
)
|
)
|
||||||
|
bot.register_command(
|
||||||
|
"words",
|
||||||
|
lambda *args: WordsScanner().compute(*args),
|
||||||
|
"words: rank words by their usage",
|
||||||
|
WordsScanner.help(),
|
||||||
|
)
|
||||||
bot.register_command(
|
bot.register_command(
|
||||||
"mentioned",
|
"mentioned",
|
||||||
lambda *args: MentionedScanner().compute(*args),
|
lambda *args: MentionedScanner().compute(*args),
|
||||||
|
|||||||
@@ -11,3 +11,4 @@ from .reactions_scanner import ReactionsScanner
|
|||||||
from .first_scanner import FirstScanner
|
from .first_scanner import FirstScanner
|
||||||
from .last_scanner import LastScanner
|
from .last_scanner import LastScanner
|
||||||
from .random_scanner import RandomScanner
|
from .random_scanner import RandomScanner
|
||||||
|
from .words_scanner import WordsScanner
|
||||||
@@ -36,15 +36,16 @@ class Scanner(ABC):
|
|||||||
):
|
):
|
||||||
args = list(args)
|
args = list(args)
|
||||||
guild = message.guild
|
guild = message.guild
|
||||||
logs = GuildLogs(guild)
|
with GuildLogs(guild) as logs:
|
||||||
|
|
||||||
# If "%cmd help" redirect to "%help cmd"
|
# If "%cmd help" redirect to "%help cmd"
|
||||||
if "help" in args:
|
if "help" in args:
|
||||||
await client.bot.help(client, message, "help", args[0])
|
await client.bot.help(client, message, "help", args[0])
|
||||||
return
|
return
|
||||||
|
|
||||||
# check args validity
|
# check args validity
|
||||||
str_channel_mentions = [str(channel.id) for channel in message.channel_mentions]
|
str_channel_mentions = [
|
||||||
|
str(channel.id) for channel in message.channel_mentions
|
||||||
|
]
|
||||||
str_mentions = [str(member.id) for member in message.mentions]
|
str_mentions = [str(member.id) for member in message.mentions]
|
||||||
for i, arg in enumerate(args[1:]):
|
for i, arg in enumerate(args[1:]):
|
||||||
if re.match(r"^<@!?\d+>$", arg):
|
if re.match(r"^<@!?\d+>$", arg):
|
||||||
|
|||||||
@@ -0,0 +1,111 @@
|
|||||||
|
from typing import Dict, List
|
||||||
|
from collections import defaultdict
|
||||||
|
import discord
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Custom libs
|
||||||
|
|
||||||
|
from logs import ChannelLogs, MessageLog
|
||||||
|
from .scanner import Scanner
|
||||||
|
from data_types import Counter
|
||||||
|
from utils import (
|
||||||
|
COMMON_HELP_ARGS,
|
||||||
|
plural,
|
||||||
|
precise,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WordsScanner(Scanner):
|
||||||
|
@staticmethod
|
||||||
|
def help() -> str:
|
||||||
|
return (
|
||||||
|
"```\n"
|
||||||
|
+ "%words: Rank words by their usage\n"
|
||||||
|
+ "arguments:\n"
|
||||||
|
+ COMMON_HELP_ARGS
|
||||||
|
+ "* <n> - top <n> words, default is 10\n"
|
||||||
|
+ "* everyone - include bots\n"
|
||||||
|
+ "Example: %words 10 #mychannel1 #mychannel2 @user\n"
|
||||||
|
+ "```"
|
||||||
|
)
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
has_digit_args=True,
|
||||||
|
valid_args=["all", "everyone"],
|
||||||
|
help=WordsScanner.help(),
|
||||||
|
intro_context="Words usage",
|
||||||
|
)
|
||||||
|
|
||||||
|
async def init(self, message: discord.Message, *args: str) -> bool:
|
||||||
|
self.top = 10
|
||||||
|
for arg in args:
|
||||||
|
if arg.isdigit():
|
||||||
|
self.top = int(arg)
|
||||||
|
self.words = defaultdict(Counter)
|
||||||
|
self.all_messages = "all" in args or "everyone" in args
|
||||||
|
return True
|
||||||
|
|
||||||
|
def compute_message(self, channel: ChannelLogs, message: MessageLog):
|
||||||
|
return WordsScanner.analyse_message(
|
||||||
|
message,
|
||||||
|
self.words,
|
||||||
|
self.raw_members,
|
||||||
|
all_messages=self.all_messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_results(self, intro: str) -> List[str]:
|
||||||
|
words = [word for word in self.words]
|
||||||
|
words.sort(key=lambda word: self.words[word].score(), reverse=True)
|
||||||
|
words = words[: self.top]
|
||||||
|
# Get the total of all emotes used
|
||||||
|
usage_count = Counter.total(self.words)
|
||||||
|
print(len(self.words))
|
||||||
|
res = [intro]
|
||||||
|
res += [
|
||||||
|
self.words[word].to_string(
|
||||||
|
words.index(word),
|
||||||
|
f"`{word}`",
|
||||||
|
total_usage=usage_count,
|
||||||
|
)
|
||||||
|
for word in words
|
||||||
|
]
|
||||||
|
res += [
|
||||||
|
f"Total: {plural(usage_count,'time')} ({precise(usage_count/self.msg_count)}/msg)"
|
||||||
|
]
|
||||||
|
return res
|
||||||
|
|
||||||
|
special_cases = ["'s", "s"]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def analyse_message(
|
||||||
|
message: MessageLog,
|
||||||
|
words: Dict[str, Counter],
|
||||||
|
raw_members: List[int],
|
||||||
|
*,
|
||||||
|
all_messages: bool,
|
||||||
|
) -> bool:
|
||||||
|
impacted = False
|
||||||
|
# If author is included in the selection (empty list is all)
|
||||||
|
if (
|
||||||
|
(not message.bot or all_messages)
|
||||||
|
and len(raw_members) == 0
|
||||||
|
or message.author in raw_members
|
||||||
|
):
|
||||||
|
impacted = True
|
||||||
|
for word in re.split("[^\w\-']", message.content):
|
||||||
|
m = re.match("[^\w]*((?![\d_])\w.+(?![\d_])\w)[^\w]*", word)
|
||||||
|
if m:
|
||||||
|
word = m[1].lower()
|
||||||
|
for case in WordsScanner.special_cases:
|
||||||
|
if word.endswith(case) and word[: -len(case)] in words:
|
||||||
|
word = word[: -len(case)]
|
||||||
|
break
|
||||||
|
if word + case in words:
|
||||||
|
words[word] = words[word + case]
|
||||||
|
del words[word + case]
|
||||||
|
break
|
||||||
|
words[word].update_use(
|
||||||
|
message.content.count(word), message.created_at
|
||||||
|
)
|
||||||
|
return impacted
|
||||||
Reference in New Issue
Block a user