Merge pull request #21 from Klemek/dev

v1.12
This commit is contained in:
Klemek
2021-04-07 19:02:03 +02:00
committed by GitHub
8 changed files with 341 additions and 150 deletions
+1
View File
@@ -5,3 +5,4 @@ __pycache__
error_* error_*
*.log *.log
/logs/ /logs/
.vscode
+9 -2
View File
@@ -43,7 +43,10 @@
* %react - rank users by their reactions * %react - rank users by their reactions
* arguments: * arguments:
* <n> - top <n> messages, default is 10 * <n> - top <n> messages, default is 10
* %cancel - cancel current analysis * %words - rank words by their usage
* arguments:
* <n> - top <n> words, default is 10
* %cancel - cancel current analysis (not launched with fast)
* Common arguments: * Common arguments:
* @member/me: filter for one or more member * @member/me: filter for one or more member
@@ -100,8 +103,12 @@ python3 src/main.py
## Changelog ## Changelog
* **v1.12**
* more scans: `%words`
* concurrent `fast` analysis
* assume `fast` if last analysis is fresh
* **v1.11** * **v1.11**
* more scans `%first`, `%rand`, `%last` * more scans: `%first`, `%rand`, `%last`
* streak computing in `%pres` * streak computing in `%pres`
* **v1.10** * **v1.10**
* multithreading for queries * multithreading for queries
+6 -1
View File
@@ -40,7 +40,12 @@ class ChannelLogs:
self.channel = channel self.channel = channel
try: try:
if self.last_message_id is not None: # append if self.last_message_id is not None: # append
while self.last_message_id != channel.last_message_id: tmp_message_id = None
while (
self.last_message_id != channel.last_message_id
and self.last_message_id != tmp_message_id
):
tmp_message_id = self.last_message_id
async for message in channel.history( async for message in channel.history(
limit=CHUNK_SIZE, limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id), after=FakeMessage(self.last_message_id),
+75 -17
View File
@@ -4,6 +4,7 @@ import discord
import json import json
import gzip import gzip
from datetime import datetime from datetime import datetime
import time
import logging import logging
import asyncio import asyncio
import threading import threading
@@ -22,6 +23,8 @@ current_analysis_lock = threading.Lock()
ALREADY_RUNNING = -100 ALREADY_RUNNING = -100
CANCELLED = -200 CANCELLED = -200
MIN_MODIFICATION_TIME = 5 * 60
class Worker: class Worker:
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel): def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
@@ -53,12 +56,39 @@ class GuildLogs:
self.guild = guild self.guild = guild
self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz") self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
self.channels = {} self.channels = {}
self.locked = False
def __enter__(self):
return self
def __exit__(self, type, value, tb):
del self.channels
del self.guild
if self.locked:
self.unlock()
def dict(self) -> dict: def dict(self) -> dict:
return {id: self.channels[id].dict() for id in self.channels} return {id: self.channels[id].dict() for id in self.channels}
def check_cancelled(self) -> bool: def check_cancelled(self) -> bool:
return self.log_file not in current_analysis return self.locked and self.log_file not in current_analysis
def lock(self) -> bool:
self.locked = True
current_analysis_lock.acquire()
if self.log_file in current_analysis:
current_analysis_lock.release()
return False
current_analysis.append(self.log_file)
current_analysis_lock.release()
return True
def unlock(self):
self.locked = False
current_analysis_lock.acquire()
if self.log_file in current_analysis:
current_analysis.remove(self.log_file)
current_analysis_lock.release()
async def load( async def load(
self, self,
@@ -68,19 +98,18 @@ class GuildLogs:
fast: bool, fast: bool,
fresh: bool, fresh: bool,
) -> Tuple[int, int]: ) -> Tuple[int, int]:
current_analysis_lock.acquire() self.locked = False
if self.log_file in current_analysis: if not fast and not self.lock():
current_analysis_lock.release()
return ALREADY_RUNNING, 0 return ALREADY_RUNNING, 0
current_analysis.append(self.log_file)
current_analysis_lock.release()
t00 = datetime.now() t00 = datetime.now()
# read logs # read logs
if not os.path.exists(LOG_DIR): if not os.path.exists(LOG_DIR):
os.mkdir(LOG_DIR) os.mkdir(LOG_DIR)
last_time = None
if os.path.exists(self.log_file): if os.path.exists(self.log_file):
channels = {} channels = {}
try: try:
last_time = os.path.getmtime(self.log_file)
gziped_data = None gziped_data = None
await code_message(progress, "Reading saved history (1/4)...") await code_message(progress, "Reading saved history (1/4)...")
t0 = datetime.now() t0 = datetime.now()
@@ -92,6 +121,7 @@ class GuildLogs:
await code_message(progress, "Reading saved history (2/4)...") await code_message(progress, "Reading saved history (2/4)...")
t0 = datetime.now() t0 = datetime.now()
json_data = gzip.decompress(gziped_data) json_data = gzip.decompress(gziped_data)
del gziped_data
logging.info( logging.info(
f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms" f"log {self.guild.id} > gzip decompress in {delta(t0):,}ms"
) )
@@ -100,6 +130,7 @@ class GuildLogs:
await code_message(progress, "Reading saved history (3/4)...") await code_message(progress, "Reading saved history (3/4)...")
t0 = datetime.now() t0 = datetime.now()
channels = json.loads(json_data) channels = json.loads(json_data)
del json_data
logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms") logging.info(f"log {self.guild.id} > json parse in {delta(t0):,}ms")
if self.check_cancelled(): if self.check_cancelled():
return CANCELLED, 0 return CANCELLED, 0
@@ -122,15 +153,40 @@ class GuildLogs:
else: else:
fast = False fast = False
if len(target_channels) == 0:
target_channels = (
self.channels.values() if fast else self.guild.text_channels
)
elif fast:
# select already loaded channels only
target_channels_tmp = [
channel for channel in target_channels if channel.id in self.channels
]
if len(target_channels_tmp) == 0:
fast = False
else:
target_channels = target_channels_tmp
# assume fast if file is fresh
if (
not fast
and not fresh
and last_time is not None
and (time.time() - last_time) < MIN_MODIFICATION_TIME
):
invalid_target_channels = [
channel
for channel in target_channels
if channel.id not in self.channels
]
if len(invalid_target_channels) == 0:
fast = True
if self.locked:
self.unlock()
total_msg = 0 total_msg = 0
total_chan = 0 total_chan = 0
if fast: if fast:
if len(target_channels) == 0:
total_msg = sum(
[len(channel.messages) for channel in self.channels.values()]
)
total_chan = len(self.channels)
else:
target_channels_id = [channel.id for channel in target_channels] target_channels_id = [channel.id for channel in target_channels]
total_msg = sum( total_msg = sum(
[ [
@@ -141,12 +197,10 @@ class GuildLogs:
) )
total_chan = len(target_channels) total_chan = len(target_channels)
else: else:
if not self.locked and not self.lock():
return ALREADY_RUNNING, 0
# load channels # load channels
t0 = datetime.now() t0 = datetime.now()
if len(target_channels) == 0:
target_channels = (
self.guild.text_channels if not fast else self.channels.keys()
)
loading_new = 0 loading_new = 0
queried_msg = 0 queried_msg = 0
total_chan = 0 total_chan = 0
@@ -225,6 +279,7 @@ class GuildLogs:
) )
t0 = datetime.now() t0 = datetime.now()
gziped_data = gzip.compress(json_data) gziped_data = gzip.compress(json_data)
del json_data
logging.info( logging.info(
f"log {self.guild.id} > gzip in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s" f"log {self.guild.id} > gzip in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
) )
@@ -237,6 +292,7 @@ class GuildLogs:
t0 = datetime.now() t0 = datetime.now()
with open(self.log_file, mode="wb") as f: with open(self.log_file, mode="wb") as f:
f.write(gziped_data) f.write(gziped_data)
del gziped_data
logging.info( logging.info(
f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s" f"log {self.guild.id} > saved in {delta(t0):,}ms -> {real_total_msg / deltas(t0):,.3f} m/s"
) )
@@ -247,6 +303,7 @@ class GuildLogs:
f"Analysing...\n{total_msg:,} messages in {total_chan:,} channels", f"Analysing...\n{total_msg:,} messages in {total_chan:,} channels",
) )
logging.info(f"log {self.guild.id} > TOTAL TIME: {delta(t00):,}ms") logging.info(f"log {self.guild.id} > TOTAL TIME: {delta(t00):,}ms")
if self.locked:
current_analysis_lock.acquire() current_analysis_lock.acquire()
current_analysis.remove(self.log_file) current_analysis.remove(self.log_file)
current_analysis_lock.release() current_analysis_lock.release()
@@ -262,5 +319,6 @@ class GuildLogs:
else: else:
current_analysis_lock.release() current_analysis_lock.release()
await message.channel.send( await message.channel.send(
f"No analysis are currently running on this server", reference=message f"No cancellable analysis are currently running on this server",
reference=message,
) )
+10 -3
View File
@@ -21,6 +21,7 @@ from scanners import (
FirstScanner, FirstScanner,
RandomScanner, RandomScanner,
LastScanner, LastScanner,
WordsScanner,
) )
from logs import GuildLogs from logs import GuildLogs
@@ -32,7 +33,7 @@ emojis.load_emojis()
bot = Bot( bot = Bot(
"Discord Analyst", "Discord Analyst",
"1.11", "1.12",
alias="%", alias="%",
) )
@@ -41,8 +42,8 @@ bot.log_calls = True
bot.register_command( bot.register_command(
"(cancel|stop)", "(cancel|stop)",
GuildLogs.cancel, GuildLogs.cancel,
"cancel: stop current analysis", "cancel: stop current analysis (not launched with fast)",
"```\n" + "%cancel: Stop current analysis\n" + "```", "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
) )
bot.register_command( bot.register_command(
"last", "last",
@@ -62,6 +63,12 @@ bot.register_command(
"first: read first message", "first: read first message",
FirstScanner.help(), FirstScanner.help(),
) )
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: rank words by their usage",
WordsScanner.help(),
)
bot.register_command( bot.register_command(
"mentioned", "mentioned",
lambda *args: MentionedScanner().compute(*args), lambda *args: MentionedScanner().compute(*args),
+1
View File
@@ -11,3 +11,4 @@ from .reactions_scanner import ReactionsScanner
from .first_scanner import FirstScanner from .first_scanner import FirstScanner
from .last_scanner import LastScanner from .last_scanner import LastScanner
from .random_scanner import RandomScanner from .random_scanner import RandomScanner
from .words_scanner import WordsScanner
+4 -3
View File
@@ -36,15 +36,16 @@ class Scanner(ABC):
): ):
args = list(args) args = list(args)
guild = message.guild guild = message.guild
logs = GuildLogs(guild) with GuildLogs(guild) as logs:
# If "%cmd help" redirect to "%help cmd" # If "%cmd help" redirect to "%help cmd"
if "help" in args: if "help" in args:
await client.bot.help(client, message, "help", args[0]) await client.bot.help(client, message, "help", args[0])
return return
# check args validity # check args validity
str_channel_mentions = [str(channel.id) for channel in message.channel_mentions] str_channel_mentions = [
str(channel.id) for channel in message.channel_mentions
]
str_mentions = [str(member.id) for member in message.mentions] str_mentions = [str(member.id) for member in message.mentions]
for i, arg in enumerate(args[1:]): for i, arg in enumerate(args[1:]):
if re.match(r"^<@!?\d+>$", arg): if re.match(r"^<@!?\d+>$", arg):
+111
View File
@@ -0,0 +1,111 @@
from typing import Dict, List
from collections import defaultdict
import discord
import re
# Custom libs
from logs import ChannelLogs, MessageLog
from .scanner import Scanner
from data_types import Counter
from utils import (
COMMON_HELP_ARGS,
plural,
precise,
)
class WordsScanner(Scanner):
@staticmethod
def help() -> str:
return (
"```\n"
+ "%words: Rank words by their usage\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - top <n> words, default is 10\n"
+ "* everyone - include bots\n"
+ "Example: %words 10 #mychannel1 #mychannel2 @user\n"
+ "```"
)
def __init__(self):
super().__init__(
has_digit_args=True,
valid_args=["all", "everyone"],
help=WordsScanner.help(),
intro_context="Words usage",
)
async def init(self, message: discord.Message, *args: str) -> bool:
self.top = 10
for arg in args:
if arg.isdigit():
self.top = int(arg)
self.words = defaultdict(Counter)
self.all_messages = "all" in args or "everyone" in args
return True
def compute_message(self, channel: ChannelLogs, message: MessageLog):
return WordsScanner.analyse_message(
message,
self.words,
self.raw_members,
all_messages=self.all_messages,
)
def get_results(self, intro: str) -> List[str]:
words = [word for word in self.words]
words.sort(key=lambda word: self.words[word].score(), reverse=True)
words = words[: self.top]
# Get the total of all emotes used
usage_count = Counter.total(self.words)
print(len(self.words))
res = [intro]
res += [
self.words[word].to_string(
words.index(word),
f"`{word}`",
total_usage=usage_count,
)
for word in words
]
res += [
f"Total: {plural(usage_count,'time')} ({precise(usage_count/self.msg_count)}/msg)"
]
return res
special_cases = ["'s", "s"]
@staticmethod
def analyse_message(
message: MessageLog,
words: Dict[str, Counter],
raw_members: List[int],
*,
all_messages: bool,
) -> bool:
impacted = False
# If author is included in the selection (empty list is all)
if (
(not message.bot or all_messages)
and len(raw_members) == 0
or message.author in raw_members
):
impacted = True
for word in re.split("[^\w\-']", message.content):
m = re.match("[^\w]*((?![\d_])\w.+(?![\d_])\w)[^\w]*", word)
if m:
word = m[1].lower()
for case in WordsScanner.special_cases:
if word.endswith(case) and word[: -len(case)] in words:
word = word[: -len(case)]
break
if word + case in words:
words[word] = words[word + case]
del words[word + case]
break
words[word].update_use(
message.content.count(word), message.created_at
)
return impacted