%words improvement

This commit is contained in:
Klemek
2021-04-09 00:40:28 +02:00
parent 8cc0e1fe65
commit 04f681dba6
3 changed files with 16 additions and 19 deletions
+3 -1
View File
@@ -43,7 +43,7 @@
* %react - rank users by their reactions
* arguments:
* <n> - top <n> messages, default is 10
* %words - rank words by their usage
* %words - (BETA) rank words by their usage
* arguments:
* <n> - words containings <n> or more letters, default is 3
* <n2> - top <n2> words, default is 10
@@ -104,6 +104,8 @@ python3 src/main.py
## Changelog
* **v1.13**
* improved scan `%words`
* **v1.12**
* more scans: `%words`
* concurrent `fast` analysis
+6 -6
View File
@@ -45,6 +45,12 @@ bot.register_command(
"cancel: stop current analysis (not launched with fast)",
"```\n" + "%cancel: Stop current analysis (not launched with fast)\n" + "```",
)
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: (BETA) rank words by their usage",
WordsScanner.help(),
)
bot.register_command(
"last",
lambda *args: LastScanner().compute(*args),
@@ -63,12 +69,6 @@ bot.register_command(
"first: read first message",
FirstScanner.help(),
)
bot.register_command(
"words",
lambda *args: WordsScanner().compute(*args),
"words: rank words by their usage",
WordsScanner.help(),
)
bot.register_command(
"mentioned",
lambda *args: MentionedScanner().compute(*args),
+7 -12
View File
@@ -20,7 +20,7 @@ class WordsScanner(Scanner):
def help() -> str:
return (
"```\n"
+ "%words: Rank words by their usage\n"
+ "%words: (BETA) Rank words by their usage\n"
+ "arguments:\n"
+ COMMON_HELP_ARGS
+ "* <n> - words containings <n> or more letters, default is 3\n"
@@ -104,16 +104,13 @@ class WordsScanner(Scanner):
or message.author in raw_members
):
impacted = True
content = " ".join(
[
block
for block in message.content.split()
if not re.match(r"^\w+:\/\/", block)
]
)
content = message.content
content = re.sub(r"```.+```", "", content, flags=re.DOTALL)
content = re.sub(r"`.+`", "", content, flags=re.DOTALL)
content = re.sub(r"\w+:\/\/[^ ]+", "", content)
for word in re.split("[^\w\-':]", content):
m = re.match(
r"(?!^:\w+:$)^[^\w]*((?![\d_])\w.*(?![\d_])\w)[^\w]*$", word
r"(?!^:\w+:$)^[^\w]*((?![\d_])\w[\w\-']*(?![\d_])\w)[^\w]*$", word
)
if m:
word = m[1].lower()
@@ -126,7 +123,5 @@ class WordsScanner(Scanner):
words[word] = words[word + case]
del words[word + case]
break
words[word].update_use(
message.content.count(word), message.created_at
)
words[word].update_use(1, message.created_at)
return impacted