From 653f91dda30c9d726eded123bc77e7e6f47da4d3 Mon Sep 17 00:00:00 2001 From: Klemek Date: Wed, 7 Apr 2021 14:35:23 +0200 Subject: [PATCH] new words scanner --- README.md | 7 ++- src/main.py | 9 ++- src/scanners/__init__.py | 3 +- src/scanners/words_scanner.py | 111 ++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+), 3 deletions(-) create mode 100644 src/scanners/words_scanner.py diff --git a/README.md b/README.md index a3d936b..4b9fe2d 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,9 @@ * %react - rank users by their reactions * arguments: * - top messages, default is 10 +* %words - rank words by their usage + * arguments: + * - top words, default is 10 * %cancel - cancel current analysis * Common arguments: @@ -100,8 +103,10 @@ python3 src/main.py ## Changelog +* **v1.12** + * more scans: `%words` * **v1.11** - * more scans `%first`, `%rand`, `%last` + * more scans: `%first`, `%rand`, `%last` * streak computing in `%pres` * **v1.10** * multithreading for queries diff --git a/src/main.py b/src/main.py index 2f73357..027a6fd 100644 --- a/src/main.py +++ b/src/main.py @@ -21,6 +21,7 @@ from scanners import ( FirstScanner, RandomScanner, LastScanner, + WordsScanner, ) from logs import GuildLogs @@ -32,7 +33,7 @@ emojis.load_emojis() bot = Bot( "Discord Analyst", - "1.11", + "1.12", alias="%", ) @@ -62,6 +63,12 @@ bot.register_command( "first: read first message", FirstScanner.help(), ) +bot.register_command( + "words", + lambda *args: WordsScanner().compute(*args), + "words: rank words by their usage", + WordsScanner.help(), +) bot.register_command( "mentioned", lambda *args: MentionedScanner().compute(*args), diff --git a/src/scanners/__init__.py b/src/scanners/__init__.py index 37eaa77..21fd922 100644 --- a/src/scanners/__init__.py +++ b/src/scanners/__init__.py @@ -10,4 +10,5 @@ from .channels_scanner import ChannelsScanner from .reactions_scanner import ReactionsScanner from .first_scanner import FirstScanner from .last_scanner import LastScanner -from .random_scanner import RandomScanner \ No newline at end of file +from .random_scanner import RandomScanner +from .words_scanner import WordsScanner \ No newline at end of file diff --git a/src/scanners/words_scanner.py b/src/scanners/words_scanner.py new file mode 100644 index 0000000..2148fcf --- /dev/null +++ b/src/scanners/words_scanner.py @@ -0,0 +1,111 @@ +from typing import Dict, List +from collections import defaultdict +import discord +import re + +# Custom libs + +from logs import ChannelLogs, MessageLog +from .scanner import Scanner +from data_types import Counter +from utils import ( + COMMON_HELP_ARGS, + plural, + precise, +) + + +class WordsScanner(Scanner): + @staticmethod + def help() -> str: + return ( + "```\n" + + "%words: Rank words by their usage\n" + + "arguments:\n" + + COMMON_HELP_ARGS + + "* - top words, default is 10\n" + + "* everyone - include bots\n" + + "Example: %words 10 #mychannel1 #mychannel2 @user\n" + + "```" + ) + + def __init__(self): + super().__init__( + has_digit_args=True, + valid_args=["all", "everyone"], + help=WordsScanner.help(), + intro_context="Mention usage", + ) + + async def init(self, message: discord.Message, *args: str) -> bool: + self.top = 10 + for arg in args: + if arg.isdigit(): + self.top = int(arg) + self.words = defaultdict(Counter) + self.all_messages = "all" in args or "everyone" in args + return True + + def compute_message(self, channel: ChannelLogs, message: MessageLog): + return WordsScanner.analyse_message( + message, + self.words, + self.raw_members, + all_messages=self.all_messages, + ) + + def get_results(self, intro: str) -> List[str]: + words = [word for word in self.words] + words.sort(key=lambda word: self.words[word].score(), reverse=True) + words = words[: self.top] + # Get the total of all emotes used + usage_count = Counter.total(self.words) + print(len(self.words)) + res = [intro] + res += [ + self.words[word].to_string( + words.index(word), + f"`{word}`", + total_usage=usage_count, + ) + for word in words + ] + res += [ + f"Total: {plural(usage_count,'time')} ({precise(usage_count/self.msg_count)}/msg)" + ] + return res + + special_cases = ["'s", "s"] + + @staticmethod + def analyse_message( + message: MessageLog, + words: Dict[str, Counter], + raw_members: List[int], + *, + all_messages: bool, + ) -> bool: + impacted = False + # If author is included in the selection (empty list is all) + if ( + (not message.bot or all_messages) + and len(raw_members) == 0 + or message.author in raw_members + ): + impacted = True + for word in re.split("[^\w\-']", message.content): + m = re.match("[^\w]*((?![\d_])\w.+(?![\d_])\w)[^\w]*", word) + if m: + word = m[1].lower() + for case in WordsScanner.special_cases: + if word.endswith(case) and word[: -len(case)] in words: + word = word[: -len(case)] + break + if word + case in words: + words[word] = words[word + case] + del words[word + case] + break + words[word].update_use( + message.content.count(word), message.created_at + ) + return impacted