only fetch history of given time

This commit is contained in:
Klemek
2021-04-09 19:07:43 +02:00
parent 09161850c5
commit 0e4ed0eb6b
4 changed files with 111 additions and 55 deletions
+59 -25
View File
@@ -1,6 +1,7 @@
from typing import Union, Tuple, Any
import discord
from discord import message
from datetime import datetime
from . import MessageLog
from utils import FakeMessage
@@ -18,6 +19,7 @@ class ChannelLogs:
self.id = channel.id
self.name = channel.name
self.last_message_id = None
self.first_message_id = None
self.format = FORMAT
self.messages = []
self.start_date = None
@@ -32,6 +34,12 @@ class ChannelLogs:
if channel["last_message_id"] is not None
else None
)
self.first_message_id = (
int(channel["first_message_id"])
if "first_message_id" in channel
and channel["first_message_id"] is not None
else None
)
self.messages = [
MessageLog(message, self) for message in channel["messages"]
]
@@ -42,48 +50,74 @@ class ChannelLogs:
def is_format(self):
return self.format == FORMAT
async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
async def load(
self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
) -> Tuple[int, int]:
self.name = channel.name
self.channel = channel
is_empty = self.last_message_id is None
try:
if self.last_message_id is not None: # append
if is_empty:
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check != 1:
yield len(self.messages), True
return
# load backward
if is_empty or (
start_date is not None
and self.start_date > start_date
and self.first_message_id is not None
):
first_message_id = self.first_message_id
first_message_date = None
tmp_message_id = 0
done = 0
while (
done >= CHUNK_SIZE
or first_message_id is None
or (first_message_date is None or first_message_date >= start_date)
and start_date is not None
) and tmp_message_id != first_message_id:
tmp_message_id = first_message_id
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(first_message_id)
if first_message_id is not None
else None,
oldest_first=False,
):
done += 1
first_message_id = message.id
first_message_date = message.created_at
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
if done >= CHUNK_SIZE and first_message_date < start_date:
# date was limiting here, store first message id
self.first_message_id = first_message_id
self.last_message_id = channel.last_message_id
# load forward
if not is_empty:
tmp_message_id = None
last_message_date = self.messages[0].created_at
while (
self.last_message_id != channel.last_message_id
and self.last_message_id != tmp_message_id
):
or (stop_date is not None and last_message_date <= stop_date)
) and self.last_message_id != tmp_message_id:
tmp_message_id = self.last_message_id
async for message in channel.history(
limit=CHUNK_SIZE,
after=FakeMessage(self.last_message_id),
oldest_first=True,
):
last_message_date = message.created_at
self.last_message_id = message.id
m = MessageLog(message, self)
await m.load(message)
self.messages.insert(0, m)
yield len(self.messages), False
else: # first load
last_message_id = None
done = 0
sanity_check = len(await channel.history(limit=1).flatten())
if sanity_check == 1:
while done >= CHUNK_SIZE or last_message_id is None:
done = 0
async for message in channel.history(
limit=CHUNK_SIZE,
before=FakeMessage(last_message_id)
if last_message_id is not None
else None,
oldest_first=False,
):
done += 1
last_message_id = message.id
m = MessageLog(message, self)
await m.load(message)
self.messages += [m]
yield len(self.messages), False
self.last_message_id = channel.last_message_id
except discord.errors.HTTPException:
yield -1, True
return # When an exception occurs (like Forbidden)
+21 -5
View File
@@ -32,7 +32,13 @@ MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
class Worker:
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
def __init__(
self,
channel_log: ChannelLogs,
channel: discord.TextChannel,
start_date: datetime,
stop_date: datetime,
):
self.channel_log = channel_log
self.channel = channel
self.start_msg = len(channel_log.messages)
@@ -41,12 +47,16 @@ class Worker:
self.done = False
self.cancelled = False
self.loop = asyncio.get_event_loop()
self.start_date = start_date
self.stop_date = stop_date
def start(self):
asyncio.run_coroutine_threadsafe(self.process(), self.loop)
async def process(self):
async for count, done in self.channel_log.load(self.channel):
async for count, done in self.channel_log.load(
self.channel, self.start_date, self.stop_date
):
if count > 0:
self.queried_msg = count - self.start_msg
self.total_msg = count
@@ -98,7 +108,9 @@ class GuildLogs:
async def load(
self,
progress: discord.Message,
target_channels: List[discord.TextChannel] = [],
target_channels: List[discord.TextChannel],
start_date: datetime,
stop_date: datetime,
*,
fast: bool,
fresh: bool,
@@ -173,6 +185,8 @@ class GuildLogs:
if (
not fast
and not fresh
and start_date is None
and stop_date is None
and last_time is not None
and (time.time() - last_time) < MIN_MODIFICATION_TIME
):
@@ -214,7 +228,9 @@ class GuildLogs:
if channel.id not in self.channels or fresh:
loading_new += 1
self.channels[channel.id] = ChannelLogs(channel, self)
workers += [Worker(self.channels[channel.id], channel)]
workers += [
Worker(self.channels[channel.id], channel, start_date, stop_date)
]
warning_msg = "(this might take a while)"
if len(target_channels) > 5 and loading_new > 5:
warning_msg = "(most channels are new, this will take a long while)"
@@ -255,7 +271,7 @@ class GuildLogs:
f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
)
logging.info(
f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
)
# write logs
real_total_msg = sum(