working logs query

This commit is contained in:
klemek
2021-01-06 18:58:19 +01:00
parent 014a792ad4
commit 6b5d0dd1fe
4 changed files with 188 additions and 172 deletions
+7
View File
@@ -1,7 +1,14 @@
from miniscord import Bot
import logging
import emotes
logging.basicConfig(
format="[%(asctime)s][%(levelname)s][%(module)s] %(message)s", level=logging.INFO
)
emotes.load_emojis()
bot = Bot(
"Discord Analyst", # name
"1.4", # version
+45 -92
View File
@@ -4,9 +4,11 @@ from collections import defaultdict
import discord
import re
import json
import logging
# Custom libs
from utils import debug, aggregate, no_duplicate
from log_manager import GuildLogs, ChannelLogs, MessageLog
# CONSTANTS
@@ -88,11 +90,9 @@ def load_emojis():
GLOBAL_EMOJIS[unicode] = shortcode
unicode_list += [unicode_escaped]
EMOJI_REGEX = re.compile(f"(<a?:\\w+:\\d+>|:\\w+:|{'|'.join(unicode_list)})")
logging.info(f"loaded {len(GLOBAL_EMOJIS)} emojis")
load_emojis()
print(f"loaded {len(GLOBAL_EMOJIS)} emojis")
# MAIN
HELP = (
@@ -112,6 +112,7 @@ async def compute(client: discord.client, message: discord.Message, *args: str):
Computes the %emotes command
"""
guild = message.guild
logs = GuildLogs(guild)
# If "%emotes help" redirect to "%help emotes"
if "help" in args:
@@ -131,45 +132,27 @@ async def compute(client: discord.client, message: discord.Message, *args: str):
# Get selected members
members = no_duplicate(message.mentions)
raw_members = no_duplicate(message.raw_mentions)
# Start computing data
async with message.channel.typing():
nm = 0 # number of messages treated
nmm = 0 # number of impacted messages
nc = 0 # number of channel treated
t0 = datetime.now()
# Show custom progress message and keep it to update it later
progress = await message.channel.send("```starting analysis...```")
# Analyse every channel selected
for channel in channels:
nm1, nmm1 = await analyse_channel(
channel,
emotes,
members,
progress,
nm,
nc,
all_emojis="all" in args,
analyse_members_reactions="reactions" in args,
progress = await message.channel.send("```Starting analysis...```")
total_msg, total_chan = await logs.load(progress, channels)
for id in logs.channels:
analyse_channel(
logs.channels[id], emotes, raw_members, all_emojis="all" in args
)
# If treatment was successful, increase numbers
if nm1 >= 0:
nm += nm1
nmm += nmm1
nc += 1
# Delete custom progress message
await progress.delete()
# Display results
await tell_results(
get_intro(emotes, full, channels, members, nmm, nc),
get_intro(emotes, full, channels, members, total_msg, total_chan),
emotes,
message.channel,
nmm,
total_msg,
allow_unused=full and len(members) == 0,
show_life=False,
)
dt = (datetime.now() - t0).total_seconds()
debug(message, f"response sent {dt} s -> {nm / dt} m/s")
# CLASSES
@@ -217,72 +200,42 @@ class Emote:
# ANALYSIS
async def analyse_channel(
channel: discord.TextChannel,
def analyse_channel(
channel: ChannelLogs,
emotes: Dict[str, Emote],
members: List[discord.Member],
progress: discord.Message,
nm0: int, # number of already analysed messages
nc: int, # number of already analysed channels,
raw_members: List[int],
*,
all_emojis: bool,
analyse_members_reactions: bool,
) -> Tuple[int, int]:
nm = 0
nmm = 0
try:
last_message = None
done = 0
while done >= CHUNK_SIZE or last_message is None:
done = 0
async for m in channel.history(
limit=CHUNK_SIZE, before=last_message, oldest_first=False
):
done += 1
last_message = m
# If author is not bot or included in the selection (empty list is all)
if not m.author.bot and (len(members) == 0 or m.author in members):
# Find all emotes un the current message in the form "<:emoji:123456789>"
# Filter for known emotes
found = EMOJI_REGEX.findall(m.content)
# For each emote, update its usage
for name in found:
if name not in emotes:
if not all_emojis or name not in GLOBAL_EMOJIS:
continue
name = GLOBAL_EMOJIS[name]
emotes[name].usages += 1
emotes[name].update_use(m.created_at)
# Count this message as impacted
nmm += 1
# For each reaction of this message, test if known emote and update when it's the case
for reaction in m.reactions:
name = str(reaction.emoji)
if name not in emotes:
if not all_emojis or name not in GLOBAL_EMOJIS:
continue
name = GLOBAL_EMOJIS[name]
if len(members) == 0:
emotes[name].reactions += reaction.count
emotes[name].update_use(m.created_at)
elif analyse_members_reactions:
users = await reaction.users().flatten()
for member in members:
if member in users:
emotes[name].reactions += 1
emotes[name].update_use(m.created_at)
nm += done
# await progress.edit(
# content=f"```{nm0 + nm:,} messages and {nc} channels analysed```"
# )
# await progress.edit(
# content=f"```{nm0 + nm:,} messages and {nc+1} channels analysed```"
# )
return nm, nmm
except discord.errors.HTTPException:
# When an exception occurs (like Forbidden) sent -1
return -1, -1
):
for message in channel.messages:
# If author included in the selection (empty list is all)
if len(raw_members) == 0 or message.author in raw_members:
# Find all emotes un the current message in the form "<:emoji:123456789>"
# Filter for known emotes
found = EMOJI_REGEX.findall(message.content)
# For each emote, update its usage
for name in found:
if name not in emotes:
if not all_emojis or name not in GLOBAL_EMOJIS:
continue
name = GLOBAL_EMOJIS[name]
emotes[name].usages += 1
emotes[name].update_use(message.created_at)
# For each reaction of this message, test if known emote and update when it's the case
for name in message.reactions:
raw_name = name
if name not in emotes:
if not all_emojis or name not in GLOBAL_EMOJIS:
continue
name = GLOBAL_EMOJIS[name]
if len(raw_members) == 0:
emotes[name].reactions += len(message.reactions[raw_name])
emotes[name].update_use(message.created_at)
else:
for member in raw_members:
if member in message.reactions[raw_name]:
emotes[name].reactions += 1
emotes[name].update_use(message.created_at)
# RESULTS
+126 -63
View File
@@ -1,7 +1,9 @@
from typing import Union, List
from typing import Union, List, Tuple
import os
import discord
import json
from datetime import datetime
import logging
LOG_DIR = "logs"
@@ -12,48 +14,64 @@ if not os.path.exists(LOG_DIR):
CHUNK_SIZE = 1000
class FakeMessage:
def __init__(self, id: int):
self.id = id
class MessageLog:
def __init__(self, message: Union[discord.Message, dict]):
if isinstance(message, discord.Message):
self.id = message.id
self.created_at = message.created_at
self.edited_at = message.edited_at
self.author = message.author
self.author = message.author.id
self.pinned = message.pinned
self.mention_everyone = message.mention_everyone
self.tts = message.tts
self.reference = message.reference.id
self.reference = (
message.reference.id if message.reference is not None else None
)
self.content = message.content
self.mentions = message.raw_mentions
self.role_mentions = message.raw_role_mentions
self.channel_mentions = message.raw_channel_mentions
self.reactions = {}
elif isinstance(message, dict):
self.id = message["id"]
self.created_at = message["created_at"]
self.edited_at = message["edited_at"]
self.id = int(message["id"])
self.created_at = datetime.fromisoformat(message["created_at"])
self.edited_at = (
datetime.fromisoformat(message["edited_at"])
if message["edited_at"] is not None
else None
)
self.author = message["author"]
self.pinned = message["pinned"]
self.mention_everyone = message["mention_everyone"]
self.tts = message["tts"]
self.reference = message["reference.id"]
self.reference = message["reference"]
self.content = message["content"]
self.mentions = message["raw_mentions"]
self.role_mentions = message["raw_role_mentions"]
self.channel_mentions = message["raw_channel_mentions"]
self.mentions = message["mentions"]
self.role_mentions = message["role_mentions"]
self.channel_mentions = message["channel_mentions"]
self.reactions = message["reactions"]
async def load(self, message: discord.Message):
for reaction in message.reactions:
self.reactions[str(reaction)] = []
self.reactions[str(reaction.emoji)] = []
async for user in reaction.users():
self.reactions[str(reaction)] += user.id
self.reactions[str(reaction.emoji)] += [user.id]
def dict(self):
return self.__dict__
def dict(self) -> dict:
message = dict(self.__dict__)
message["created_at"] = self.created_at.isoformat()
message["edited_at"] = (
self.edited_at.isoformat() if self.edited_at is not None else None
)
return message
class ChannelLog:
class ChannelLogs:
def __init__(self, channel: Union[discord.TextChannel, dict]):
if isinstance(channel, discord.TextChannel):
self.id = channel.id
@@ -61,82 +79,127 @@ class ChannelLog:
self.last_message_id = None
self.messages = []
elif isinstance(channel, dict):
self.id = channel["id"]
self.id = int(channel["id"])
self.name = channel["name"]
self.last_message_id = channel["last_message_id"]
self.messages = [MessageLog(message) for message in channel["messages"]]
async def load(self, channel: discord.TextChannel):
async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
self.name = channel.name
if self.last_message_id is not None: # append
while self.last_message_id != channel.last_message_id:
async for message in channel.history(
limit=CHUNK_SIZE, after=self.last_message_id, oldest_first=True
):
self.last_message_id = message.id
m = MessageLog(message)
await m.load(message)
self.messages.insert(0, m)
yield len(self.messages), False
else: # first load
last_message_id = None
done = 0
while done >= CHUNK_SIZE or last_message_id is None:
self.channel = channel
try:
if self.last_message_id is not None: # append
while self.last_message_id != channel.last_message_id:
async for message in channel.history(
limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id),
oldest_first=True,
):
self.last_message_id = message.id
if not message.author.bot:
m = MessageLog(message)
await m.load(message)
self.messages.insert(0, m)
yield len(self.messages), False
else: # first load
last_message_id = None
done = 0
async for message in channel.history(
limit=CHUNK_SIZE, after=self.last_message_id, oldest_first=False
):
done += 1
last_message_id = message.id
m = MessageLog(message)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
self.last_message_id == channel.last_message_id
while done >= CHUNK_SIZE or last_message_id is None:
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(last_message_id)
if last_message_id is not None
else None,
oldest_first=False,
):
done += 1
last_message_id = message.id
if not message.author.bot:
m = MessageLog(message)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
self.last_message_id = channel.last_message_id
except discord.errors.HTTPException:
return # When an exception occurs (like Forbidden)
yield len(self.messages), True
def dict(self):
tmp = self.__dict__
tmp["messages"] = [message.dict() for message in self.messages]
return tmp
def dict(self) -> dict:
channel = dict(self.__dict__)
channel.pop("channel", None)
channel["messages"] = [message.dict() for message in self.messages]
return channel
class GuildLogs:
def __init__(self, guild: discord.Guild):
self.guild = guild
self.log_file = os.path.join(LOG_DIR, f"{guild}.logz")
self.log_file = os.path.join(LOG_DIR, f"{guild.id}.logz")
self.channels = {}
def dict(self):
def dict(self) -> dict:
return {id: self.channels[id].dict() for id in self.channels}
async def load(self, target_channels: List[discord.TextChannel] = []):
async def load(
self, progress: discord.Message, target_channels: List[discord.TextChannel] = []
):
await progress.edit(
content=f"```Reading history...\n(this might take a while)```"
)
# read logs
t0 = datetime.now()
if os.path.exists(self.log_file):
channels = {}
with open(self.log_file, mode="r") as f:
channels = json.loads(f.readline().strip())
self.channels = {id: ChannelLog(channels[id]) for id in channels}
try:
with open(self.log_file, mode="r") as f:
channels = json.loads(f.readline().strip())
self.channels = {int(id): ChannelLogs(channels[id]) for id in channels}
dt = (datetime.now() - t0).total_seconds()
logging.info(f"log {self.guild.id} > loaded in {dt} s")
except json.decoder.JSONDecodeError:
logging.error(f"log {self.guild.id} > invalid JSON")
except IOError:
logging.error(f"log {self.guild.id} > cannot read")
# load channels
t0 = datetime.now()
if len(target_channels) == 0:
target_channels = self.guild.text_channels
loading_new = False
loading_new = 0
total_msg = 0
total_chan = 0
for channel in target_channels:
if channel.id not in self.channels:
loading_new = True
self.channels[channel.id] = ChannelLog(channel)
loading_new += 1
self.channels[channel.id] = ChannelLogs(channel)
async for count, done in self.channels[channel.id].load(channel):
yield (
total_msg + count,
total_chan + (1 if done else 0),
loading_new,
False,
)
if count > 0:
tmp_msg = total_msg + count
warning_msg = "(this might take a while)"
if len(target_channels) > 5 and loading_new > 5:
warning_msg = (
"(most channels are new, this might take a looong while)"
)
elif loading_new > 0:
warning_msg = (
"(some channels are new, this might take a long while)"
)
dt = (datetime.now() - t0).total_seconds()
await progress.edit(
content=f"```Reading history...\n{tmp_msg} messages in {total_chan + 1} channels ({round(tmp_msg/dt)}m/s)\n{warning_msg}```"
)
if done:
total_chan += 1
total_msg += len(self.channels[channel.id].messages)
total_chan += 1
yield total_msg, total_chan, loading_new, True
dt = (datetime.now() - t0).total_seconds()
await progress.edit(
content=f"```Analysing...\n{tmp_msg} messages in {total_chan} channels```"
)
logging.info(f"log {self.guild.id} > queried in {dt} s -> {total_msg / dt} m/s")
# write logs
t0 = datetime.now()
with open(self.log_file, mode="w") as f:
f.write(json.dump(self.dict()))
f.write(json.dumps(self.dict()))
dt = (datetime.now() - t0).total_seconds()
logging.info(f"log {self.guild.id} > written in {dt} s")
return total_msg, total_chan
+10 -17
View File
@@ -1,21 +1,18 @@
from typing import List
import logging
import discord
# DISCORD API
def debug(message, txt):
"""
Print a log with the context of the current event
:param message: message that triggered the event
:type message: discord.Message
:param txt: text of the log
:type txt: str
"""
print(f"{message.guild} > #{message.channel}: {txt}")
def debug(message: discord.Message, txt: str):
logging.info(f"{message.guild} > #{message.channel}: {txt}")
# LISTS
def no_duplicate(seq):
def no_duplicate(seq: list) -> list:
"""
Remove any duplicates on a list
@@ -29,16 +26,12 @@ def no_duplicate(seq):
# MESSAGE FORMATTING
def aggregate(names):
def aggregate(names: List[str]) -> str:
"""
Aggregate names with , and &
Example : "a, b, c & d"
:param names: list of names
:type names: list[str]
:return: correct aggregation
:rtype: str
"""
if len(names) == 0:
return ""