From 04f681dba6f09551805c19df58c8035581be610e Mon Sep 17 00:00:00 2001 From: Klemek Date: Fri, 9 Apr 2021 00:40:28 +0200 Subject: [PATCH] %words improvement --- README.md | 4 +++- src/main.py | 12 ++++++------ src/scanners/words_scanner.py | 19 +++++++------------ 3 files changed, 16 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 881d143..caac0c2 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ * %react - rank users by their reactions * arguments: * - top messages, default is 10 -* %words - rank words by their usage +* %words - (BETA) rank words by their usage * arguments: * - words containings or more letters, default is 3 * - top words, default is 10 @@ -104,6 +104,8 @@ python3 src/main.py ## Changelog +* **v1.13** + * improved scan `%words` * **v1.12** * more scans: `%words` * concurrent `fast` analysis diff --git a/src/main.py b/src/main.py index 565e92d..4ac9b5f 100644 --- a/src/main.py +++ b/src/main.py @@ -45,6 +45,12 @@ bot.register_command( "cancel: stop current analysis (not launched with fast)", "```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```", ) +bot.register_command( + "words", + lambda *args: WordsScanner().compute(*args), + "words: (BETA) rank words by their usage", + WordsScanner.help(), +) bot.register_command( "last", lambda *args: LastScanner().compute(*args), @@ -63,12 +69,6 @@ bot.register_command( "first: read first message", FirstScanner.help(), ) -bot.register_command( - "words", - lambda *args: WordsScanner().compute(*args), - "words: rank words by their usage", - WordsScanner.help(), -) bot.register_command( "mentioned", lambda *args: MentionedScanner().compute(*args), diff --git a/src/scanners/words_scanner.py b/src/scanners/words_scanner.py index 80d0971..c31bf9b 100644 --- a/src/scanners/words_scanner.py +++ b/src/scanners/words_scanner.py @@ -20,7 +20,7 @@ class WordsScanner(Scanner): def help() -> str: return ( "```\n" - + "%words: Rank words by their usage\n" + + "%words: (BETA) Rank words by their usage\n" + "arguments:\n" + COMMON_HELP_ARGS + "* - words containings or more letters, default is 3\n" @@ -104,16 +104,13 @@ class WordsScanner(Scanner): or message.author in raw_members ): impacted = True - content = " ".join( - [ - block - for block in message.content.split() - if not re.match(r"^\w+:\/\/", block) - ] - ) + content = message.content + content = re.sub(r"```.+```", "", content, flags=re.DOTALL) + content = re.sub(r"`.+`", "", content, flags=re.DOTALL) + content = re.sub(r"\w+:\/\/[^ ]+", "", content) for word in re.split("[^\w\-':]", content): m = re.match( - r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word + r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word ) if m: word = m[1].lower() @@ -126,7 +123,5 @@ class WordsScanner(Scanner): words[word] = words[word + case] del words[word + case] break - words[word].update_use( - message.content.count(word), message.created_at - ) + words[word].update_use(1, message.created_at) return impacted