Merge pull request #44 from Klemek/dev

v1.15
This commit is contained in:
Klemek
2021-05-19 15:19:52 +02:00
committed by GitHub
14 changed files with 330 additions and 69 deletions
+22 -4
View File
@@ -18,13 +18,24 @@
* %freq - frequency analysis
* %compo - composition analysis
* %pres - presence analysis
* %first - read first message
* %rand - read a random message
* %last - read last message
* %find - find specific words or phrases
* %repeat - repeat last analysis (adding supplied arguments)
* %mobile - fix @invalid-user for last command but mentions users
* %gdpr - displays GDPR information
* %find - find specific words or phrases (you can use quotes to add spaces in queries, backticks define regexes)
* arguments:
* top - rank users for these queries
* %first - read first message (add text to filter like %find)
* arguments:
* image - pull an image instead of a message
* spoiler:allow/only - allow spoiler images
* %rand - read a random message (add text to filter like %find)
* arguments:
* image - pull an image instead of a message
* spoiler:allow/only - allow spoiler images
* %last - read last message (add text to filter like %find)
* arguments:
* image - pull an image instead of a message
* spoiler:allow/only - allow spoiler images
* %emojis - rank emojis by their usage
* arguments:
* <n> - top <n> emojis, default is 20
@@ -61,6 +72,7 @@
* all/everyone - include bots messages
* fast: only read cache
* fresh: does not read cache
* nsfw:allow/only - allow messages from nsfw channels
* mobile/mention: mentions users (fix @invalid-user bug)
(Sample dates: 2020 / 2021-11 / 2021-06-28 / 2020-06-28T23:00 / today / week / 8days / 1y)
@@ -113,6 +125,12 @@ python3 src/main.py
## Changelog
* **v1.15**
* `nsfw:allow/only` filter nsfw channels
* `%find` can use regexes
* `%first`, `%rand` and `%last` can be filter with specific keywords
* `%first`, `%rand` and `%last` can pull images
* bug fix
* **v1.14**
* `mobile/mention` arg to fix mobile bug
* `%repeat`, `%mobile` to repeat commands
+69 -1
View File
@@ -3,13 +3,81 @@ import random
# Custom libs
from utils import mention, from_now, str_datetime, message_link
from utils import (
mention,
from_now,
str_datetime,
message_link,
SPLIT_TOKEN,
FilterLevel,
should_allow_spoiler,
)
MAX_RANDOM_TRIES = 100
class History:
def __init__(self):
self.messages = []
async def to_string_image(self, *, type: str, spoiler: FilterLevel) -> List[str]:
if len(self.messages) == 0:
return ["There was no messages matching your filters"]
message = None
intro = None
real_message = None
if type == "first":
self.messages.sort(key=lambda m: m.created_at)
index = 0
while real_message is None and index < len(self.messages):
message = self.messages[index]
real_message = await message.fetch()
if real_message is not None and not should_allow_spoiler(
real_message, spoiler
):
real_message = None
index += 1
intro = f"First image out of {len(self.messages):,}"
elif type == "last":
self.messages.sort(key=lambda m: m.created_at, reverse=True)
index = 0
while real_message is None and index < len(self.messages):
message = self.messages[index]
real_message = await message.fetch()
if real_message is not None and not should_allow_spoiler(
real_message, spoiler
):
real_message = None
index += 1
intro = f"Last image out of {len(self.messages):,}"
elif type == "random":
intro = f"Random image out of {len(self.messages):,}"
tries = 0
while real_message is None and tries < MAX_RANDOM_TRIES:
message = random.choice(self.messages)
real_message = await message.fetch()
if real_message is not None and not should_allow_spoiler(
real_message, spoiler
):
real_message = None
tries += 1
if real_message is None:
return ["There was no messages matching your filters"]
image = "<Error>"
if len(real_message.attachments) > 0:
image = real_message.attachments[0].url
elif len(real_message.embeds) > 0:
image = real_message.embeds[0].url
return [
intro,
f"{str_datetime(message.created_at)} ({from_now(message.created_at)}) {mention(message.author)} sent:",
f"<{message_link(message)}>",
SPLIT_TOKEN,
image,
]
def to_string(self, *, type: str) -> List[str]:
if len(self.messages) == 0:
return ["There was no messages matching your filters"]
+12 -11
View File
@@ -1,16 +1,13 @@
from typing import Union, Tuple, Any
import discord
from discord import message
from datetime import datetime
from . import MessageLog
from utils import FakeMessage
from utils import serialize, FakeMessage
CHUNK_SIZE = 2000
FORMAT = 3
NOT_SERIALIZED = ["channel", "guild", "start_date"]
class ChannelLogs:
def __init__(self, channel: Union[discord.TextChannel, dict], guild: Any):
@@ -50,11 +47,17 @@ class ChannelLogs:
def is_format(self):
return self.format == FORMAT
def preload(self, channel: discord.TextChannel):
self.name = channel.name
self.channel = channel
@property
def nsfw(self):
self.channel.nsfw
async def load(
self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
) -> Tuple[int, int]:
self.name = channel.name
self.channel = channel
is_empty = self.last_message_id is None
try:
if is_empty:
@@ -110,7 +113,7 @@ class ChannelLogs:
tmp_message_id = self.last_message_id
async for message in channel.history(
limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id),
after=FakeMessage(self.first_message_id),
oldest_first=True,
):
last_message_date = message.created_at
@@ -119,7 +122,7 @@ class ChannelLogs:
await m.load(message)
self.messages.insert(0, m)
yield len(self.messages), False
except discord.errors.HTTPException:
except discord.errors.HTTPException as e:
yield -1, True
return # When an exception occurs (like Forbidden)
self.start_date = (
@@ -128,8 +131,6 @@ class ChannelLogs:
yield len(self.messages), True
def dict(self) -> dict:
channel = dict(self.__dict__)
for key in NOT_SERIALIZED:
channel.pop(key, None)
channel = serialize(self, not_serialized=["channel", "guild", "start_date"])
channel["messages"] = [message.dict() for message in self.messages]
return channel
+3
View File
@@ -215,6 +215,8 @@ class GuildLogs:
]
)
total_chan = len(target_channels)
for channel in target_channels:
self.channels[channel.id].preload(channel)
else:
if not self.locked and not self.lock():
return ALREADY_RUNNING, 0
@@ -231,6 +233,7 @@ class GuildLogs:
if channel.id not in self.channels or fresh:
loading_new += 1
self.channels[channel.id] = ChannelLogs(channel, self)
self.channels[channel.id].preload(channel)
workers += [
Worker(self.channels[channel.id], channel, start_date, stop_date)
]
+10 -12
View File
@@ -1,16 +1,13 @@
from typing import Union, Any
from typing import Optional, Union, Any
import discord
from datetime import datetime
from utils import is_extension
from utils import is_extension, serialize
IMAGE_FORMAT = [".gif", ".gifv", ".png", ".jpg", ".jpeg", ".bmp"]
EMBED_IMAGES = ["image", "gifv"]
NOT_SERIALIZED = ["channel"]
class MessageLog:
def __init__(self, message: Union[discord.Message, dict], channel: Any):
self.channel = channel
@@ -80,12 +77,13 @@ class MessageLog:
async for user in reaction.users():
self.reactions[str(reaction.emoji)] += [user.id]
async def fetch(self) -> Optional[discord.Message]:
try:
return await self.channel.channel.fetch_message(self.id)
except (discord.NotFound, discord.Forbidden, discord.HTTPException):
return None
def dict(self) -> dict:
message = dict(self.__dict__)
for key in NOT_SERIALIZED:
message.pop(key, None)
message["created_at"] = self.created_at.isoformat()
message["edited_at"] = (
self.edited_at.isoformat() if self.edited_at is not None else None
return serialize(
self, not_serialized=["channel"], dates=["created_at", "edited_at"]
)
return message
+2 -2
View File
@@ -18,7 +18,7 @@ emojis.load_emojis()
bot = Bot(
"Discord Analyst",
"1.14",
"1.15",
alias="%",
)
@@ -81,7 +81,7 @@ bot.register_command(
scanners.LastScanner.help(),
)
bot.register_command(
"rand(om)?",
"(rand(om)?|mood)",
lambda *args: scanners.RandomScanner().compute(*args),
"rand: read a random message",
scanners.RandomScanner.help(),
-1
View File
@@ -1,5 +1,4 @@
from typing import Dict, List
from collections import defaultdict
import discord
+20 -8
View File
@@ -1,6 +1,7 @@
from typing import Dict, List
from typing import Dict, List, Optional, Tuple
from collections import defaultdict
import discord
import re
# Custom libs
@@ -21,7 +22,7 @@ class FindScanner(Scanner):
def help() -> str:
return generate_help(
"find",
"Find specific words or phrases (you can use quotes to add spaces in queries)",
"Find specific words or phrases (you can use quotes to add spaces in queries, backticks define regexes)",
args=[
"top - rank users for these queries",
"all/everyone - include bots",
@@ -43,17 +44,21 @@ class FindScanner(Scanner):
self.top = "top" in args or len(self.other_args) == 1
if len(self.other_args) == 0:
await message.channel.send(
"You need to add a query to find (you can use quotes to add spaces in queries)",
"You need to add a query to find (you can use quotes to add spaces in queries, backticks define regexes)",
reference=message,
)
return False
self.queries = [
(query, query.strip("`") if re.match(r"^`.*`$", query) else None)
for query in self.other_args
]
return True
def compute_message(self, channel: ChannelLogs, message: MessageLog):
return FindScanner.analyse_message(
message,
self.matches,
self.other_args,
self.queries,
self.raw_members,
all_messages=self.all_messages,
top=self.top,
@@ -77,7 +82,9 @@ class FindScanner(Scanner):
res += [
self.matches[match].to_string(
matches.index(match),
f'"{escape_text(match)}"',
f'"{escape_text(match)}"'
if len(match.strip("`")) == len(match)
else match,
total_usage=self.msg_count,
ranking=False,
transform=lambda id: f" by {mention(id)}",
@@ -97,7 +104,7 @@ class FindScanner(Scanner):
def analyse_message(
message: MessageLog,
matches: Dict[str, Counter],
queries: List[str],
queries: List[Tuple[str, Optional[str]]],
raw_members: List[int],
*,
all_messages: bool,
@@ -113,10 +120,15 @@ class FindScanner(Scanner):
impacted = True
content = message.content.lower()
for query in queries:
count = content.count(query.lower())
if query[1] is not None:
count = len(re.findall(query[1], message.content))
else:
count = content.count(query[0].lower())
if top:
if count > 0:
matches[message.author].update_use(count, message.created_at)
else:
matches[query].update_use(count, message.created_at, message.author)
matches[query[0]].update_use(
count, message.created_at, message.author
)
return impacted
+15 -3
View File
@@ -9,10 +9,22 @@ from utils import generate_help
class FirstScanner(HistoryScanner):
@staticmethod
def help() -> str:
return generate_help("first", "Read first message")
return generate_help(
"first",
"Read first message (add text to filter like %find)",
args=[
"image - pull an image instead of a message",
"spoiler:allow/only - allow spoiler images",
],
)
def __init__(self):
super().__init__(help=FirstScanner.help())
def get_results(self, intro: str) -> List[str]:
return self.history.to_string(type="first")
async def get_results(self, intro: str) -> List[str]:
if self.images_only:
return await self.history.to_string_image(
type="first", spoiler=self.spoiler
)
else:
return self.history.to_string(type="first")
+41 -6
View File
@@ -1,26 +1,46 @@
from abc import ABC, abstractmethod
from typing import List
from typing import List, Tuple, Optional
import discord
import re
# Custom libs
from .scanner import Scanner
from data_types import History
from logs import ChannelLogs, MessageLog
from utils import FilterLevel
class HistoryScanner(Scanner, ABC):
def __init__(self, *, help: str):
super().__init__(
has_digit_args=True,
valid_args=["all", "everyone"],
valid_args=["all", "everyone", "spoiler", "spoiler:allow", "spoiler:only"],
help=help,
intro_context="",
all_args=True,
)
async def init(self, message: discord.Message, *args: str) -> bool:
self.history = History()
self.all_messages = "all" in args or "everyone" in args
self.images_only = "image" in args
if "spoiler" in args or "spoiler:allow" in args:
self.spoiler = FilterLevel.ALLOW
elif "spoiler:only" in args:
self.spoiler = FilterLevel.ONLY
else:
self.spoiler = FilterLevel.NONE
if not self.images_only:
self.queries = [
(
query.lower(),
query.strip("`") if re.match(r"^`.*`$", query) else None,
)
for query in self.other_args
]
else:
self.queries = []
return True
def compute_message(self, channel: ChannelLogs, message: MessageLog):
@@ -30,6 +50,8 @@ class HistoryScanner(Scanner, ABC):
self.history,
self.raw_members,
all_messages=self.all_messages,
queries=self.queries,
images_only=self.images_only,
)
@abstractmethod
@@ -44,14 +66,27 @@ class HistoryScanner(Scanner, ABC):
raw_members: List[int],
*,
all_messages: bool,
queries: List[Tuple[str, Optional[str]]],
images_only: bool,
) -> bool:
impacted = False
# If author is included in the selection (empty list is all)
if (
(not message.bot or all_messages)
and len(raw_members) == 0
or message.author in raw_members
) and (message.content or message.attachment):
(
(not message.bot or all_messages)
and len(raw_members) == 0
or message.author in raw_members
)
and (message.content or message.attachment)
and (not images_only or message.image)
):
content = message.content.lower()
for query in queries:
if query[1] is not None:
if not re.match(query[1], message.content):
return False
elif not query[0] in content:
return False
impacted = True
history.messages += [message]
return impacted
+13 -3
View File
@@ -9,10 +9,20 @@ from utils import generate_help
class LastScanner(HistoryScanner):
@staticmethod
def help() -> str:
return generate_help("last", "Read last message")
return generate_help(
"last",
"Read last message (add text to filter like %find)",
args=[
"image - pull an image instead of a message",
"spoiler:allow/only - allow spoiler images",
],
)
def __init__(self):
super().__init__(help=LastScanner.help())
def get_results(self, intro: str) -> List[str]:
return self.history.to_string(type="last")
async def get_results(self, intro: str) -> List[str]:
if self.images_only:
return await self.history.to_string_image(type="last", spoiler=self.spoiler)
else:
return self.history.to_string(type="last")
+15 -3
View File
@@ -9,10 +9,22 @@ from utils import generate_help
class RandomScanner(HistoryScanner):
@staticmethod
def help() -> str:
return generate_help("rand", "Read a random message")
return generate_help(
"rand",
"Read a random message (add text to filter like %find)",
args=[
"image - pull an image instead of a message",
"spoiler:allow/only - allow spoiler images",
],
)
def __init__(self):
super().__init__(help=RandomScanner.help())
def get_results(self, intro: str) -> List[str]:
return self.history.to_string(type="random")
async def get_results(self, intro: str) -> List[str]:
if self.images_only:
return await self.history.to_string_image(
type="random", spoiler=self.spoiler
)
else:
return self.history.to_string(type="random")
+62 -14
View File
@@ -4,6 +4,7 @@ from datetime import datetime
import logging
import re
import discord
import inspect
from utils import (
@@ -15,6 +16,8 @@ from utils import (
RELATIVE_REGEX,
parse_time,
command_cache,
FilterLevel,
SPLIT_TOKEN,
)
from logs import (
GuildLogs,
@@ -27,7 +30,17 @@ from logs import (
class Scanner(ABC):
VALID_ARGS = ["me", "here", "fast", "fresh", "mobile", "mention"]
VALID_ARGS = [
"me",
"here",
"fast",
"fresh",
"mobile",
"mention",
"nsfw",
"nsfw:allow",
"nsfw:only",
]
def __init__(
self,
@@ -139,6 +152,30 @@ class Scanner(ABC):
self.mention_users = "mention" in args or "mobile" in args
# nsfw filter
if "nsfw" in args or "nsfw:allow" in args:
self.nsfw = FilterLevel.ALLOW
elif "nsfw:only" in args:
self.nsfw = FilterLevel.ONLY
else:
self.nsfw = FilterLevel.NONE
# fix nsfw filter if channel specified
if not self.full and any(channel.nsfw for channel in self.channels):
self.nsfw = FilterLevel.ALLOW
elif all(channel.nsfw for channel in self.channels):
self.nsfw = FilterLevel.ONLY
# filter nsfw channels
if self.nsfw == FilterLevel.NONE:
self.channels = list(
filter(lambda channel: not channel.nsfw, self.channels)
)
elif self.nsfw == FilterLevel.ONLY:
self.channels = list(
filter(lambda channel: channel.nsfw, self.channels)
)
if not await self.init(message, *args):
return
@@ -220,18 +257,20 @@ class Scanner(ABC):
await progress.edit(content="```Computing results...```")
# Display results
t0 = datetime.now()
results = self.get_results(
get_intro(
self.intro_context,
self.full,
self.channels,
self.members,
self.msg_count,
self.chan_count,
self.start_date,
self.stop_date,
)
intro = get_intro(
self.intro_context,
self.full,
self.channels,
self.members,
self.msg_count,
self.chan_count,
self.start_date,
self.stop_date,
)
if inspect.iscoroutinefunction(self.get_results):
results = await self.get_results(intro)
else:
results = self.get_results(intro)
logging.info(
f"scan {guild.id} > results in {delta(t0):,}ms"
)
@@ -244,7 +283,7 @@ class Scanner(ABC):
)
for r in results:
if r:
if len(response + "\n" + r) > 2000:
if isinstance(r, int) and r == SPLIT_TOKEN:
await message.channel.send(
response,
reference=message if first else None,
@@ -252,7 +291,16 @@ class Scanner(ABC):
)
first = False
response = ""
response += "\n" + r
elif isinstance(r, str):
if len(response + "\n" + r) > 2000:
await message.channel.send(
response,
reference=message if first else None,
allowed_mentions=allowed_mentions,
)
first = False
response = ""
response += "\n" + r
if len(response) > 0:
await message.channel.send(
response,
+46 -1
View File
@@ -1,3 +1,4 @@
from enum import Enum
from typing import Callable, List, Dict, Union, Optional, Any
import os
import logging
@@ -17,6 +18,7 @@ COMMON_HELP_ARGS = [
"<date2> - filter before <date2>",
"fast - only read cache",
"fresh - does not read cache (long)",
"nsfw:allow/only - allow messages from nsfw channels",
"mobile/mention - mentions users (fix @invalid-user bug)",
]
@@ -30,7 +32,7 @@ def generate_help(
replace_args=[],
):
arg_list = "* " + "\n* ".join(
replace_args + COMMON_HELP_ARGS[len(replace_args) :] + args
args + replace_args + COMMON_HELP_ARGS[len(replace_args) :]
)
return f"""```
%{cmd}: {info}
@@ -49,6 +51,15 @@ def deltas(t0: datetime):
return (datetime.now() - t0).total_seconds()
class FilterLevel(Enum):
NONE = 0
ALLOW = 1
ONLY = 2
SPLIT_TOKEN = 1152317803
# DISCORD API
@@ -89,6 +100,25 @@ class FakeMessage:
self.id = id
def is_image_spoiler(message: discord.Message) -> bool:
if len(message.attachments) > 0:
return message.attachments[0].is_spoiler()
elif len(message.embeds) > 0:
return re.match(r"||[^|]*http[^|]||", message.content.lower()) is not None
else:
return False
def should_allow_spoiler(message: discord.Message, spoiler: FilterLevel) -> bool:
is_spoiler = is_image_spoiler(message)
return (
not is_spoiler
and spoiler <= FilterLevel.ALLOW
or is_spoiler
and spoiler >= FilterLevel.ALLOW
)
# FILE
@@ -137,6 +167,21 @@ def val_sum(d: Dict[Any, int]) -> int:
return sum(d.values())
def serialize(
obj: Any, *, not_serialized: List[str] = [], dates: List[str] = []
) -> Dict:
output = dict(obj.__dict__)
for key in not_serialized:
output.pop(key, None)
for key in dates:
if output[key]:
try:
output[key] = getattr(obj, key).isoformat()
except AttributeError:
pass
return output
# MESSAGE FORMATTING