only fetch history of given time
This commit is contained in:
+59
-25
@@ -1,6 +1,7 @@
|
|||||||
from typing import Union, Tuple, Any
|
from typing import Union, Tuple, Any
|
||||||
import discord
|
import discord
|
||||||
from discord import message
|
from discord import message
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
from . import MessageLog
|
from . import MessageLog
|
||||||
from utils import FakeMessage
|
from utils import FakeMessage
|
||||||
@@ -18,6 +19,7 @@ class ChannelLogs:
|
|||||||
self.id = channel.id
|
self.id = channel.id
|
||||||
self.name = channel.name
|
self.name = channel.name
|
||||||
self.last_message_id = None
|
self.last_message_id = None
|
||||||
|
self.first_message_id = None
|
||||||
self.format = FORMAT
|
self.format = FORMAT
|
||||||
self.messages = []
|
self.messages = []
|
||||||
self.start_date = None
|
self.start_date = None
|
||||||
@@ -32,6 +34,12 @@ class ChannelLogs:
|
|||||||
if channel["last_message_id"] is not None
|
if channel["last_message_id"] is not None
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
|
self.first_message_id = (
|
||||||
|
int(channel["first_message_id"])
|
||||||
|
if "first_message_id" in channel
|
||||||
|
and channel["first_message_id"] is not None
|
||||||
|
else None
|
||||||
|
)
|
||||||
self.messages = [
|
self.messages = [
|
||||||
MessageLog(message, self) for message in channel["messages"]
|
MessageLog(message, self) for message in channel["messages"]
|
||||||
]
|
]
|
||||||
@@ -42,48 +50,74 @@ class ChannelLogs:
|
|||||||
def is_format(self):
|
def is_format(self):
|
||||||
return self.format == FORMAT
|
return self.format == FORMAT
|
||||||
|
|
||||||
async def load(self, channel: discord.TextChannel) -> Tuple[int, int]:
|
async def load(
|
||||||
|
self, channel: discord.TextChannel, start_date: datetime, stop_date: datetime
|
||||||
|
) -> Tuple[int, int]:
|
||||||
self.name = channel.name
|
self.name = channel.name
|
||||||
self.channel = channel
|
self.channel = channel
|
||||||
|
is_empty = self.last_message_id is None
|
||||||
try:
|
try:
|
||||||
if self.last_message_id is not None: # append
|
if is_empty:
|
||||||
|
sanity_check = len(await channel.history(limit=1).flatten())
|
||||||
|
if sanity_check != 1:
|
||||||
|
yield len(self.messages), True
|
||||||
|
return
|
||||||
|
# load backward
|
||||||
|
if is_empty or (
|
||||||
|
start_date is not None
|
||||||
|
and self.start_date > start_date
|
||||||
|
and self.first_message_id is not None
|
||||||
|
):
|
||||||
|
first_message_id = self.first_message_id
|
||||||
|
first_message_date = None
|
||||||
|
tmp_message_id = 0
|
||||||
|
done = 0
|
||||||
|
while (
|
||||||
|
done >= CHUNK_SIZE
|
||||||
|
or first_message_id is None
|
||||||
|
or (first_message_date is None or first_message_date >= start_date)
|
||||||
|
and start_date is not None
|
||||||
|
) and tmp_message_id != first_message_id:
|
||||||
|
tmp_message_id = first_message_id
|
||||||
|
done = 0
|
||||||
|
async for message in channel.history(
|
||||||
|
limit=CHUNK_SIZE,
|
||||||
|
before=FakeMessage(first_message_id)
|
||||||
|
if first_message_id is not None
|
||||||
|
else None,
|
||||||
|
oldest_first=False,
|
||||||
|
):
|
||||||
|
done += 1
|
||||||
|
first_message_id = message.id
|
||||||
|
first_message_date = message.created_at
|
||||||
|
m = MessageLog(message, self)
|
||||||
|
await m.load(message)
|
||||||
|
self.messages += [m]
|
||||||
|
yield len(self.messages), False
|
||||||
|
if done >= CHUNK_SIZE and first_message_date < start_date:
|
||||||
|
# date was limiting here, store first message id
|
||||||
|
self.first_message_id = first_message_id
|
||||||
|
self.last_message_id = channel.last_message_id
|
||||||
|
# load forward
|
||||||
|
if not is_empty:
|
||||||
tmp_message_id = None
|
tmp_message_id = None
|
||||||
|
last_message_date = self.messages[0].created_at
|
||||||
while (
|
while (
|
||||||
self.last_message_id != channel.last_message_id
|
self.last_message_id != channel.last_message_id
|
||||||
and self.last_message_id != tmp_message_id
|
or (stop_date is not None and last_message_date <= stop_date)
|
||||||
):
|
) and self.last_message_id != tmp_message_id:
|
||||||
tmp_message_id = self.last_message_id
|
tmp_message_id = self.last_message_id
|
||||||
async for message in channel.history(
|
async for message in channel.history(
|
||||||
limit=CHUNK_SIZE,
|
limit=CHUNK_SIZE,
|
||||||
after=FakeMessage(self.last_message_id),
|
after=FakeMessage(self.last_message_id),
|
||||||
oldest_first=True,
|
oldest_first=True,
|
||||||
):
|
):
|
||||||
|
last_message_date = message.created_at
|
||||||
self.last_message_id = message.id
|
self.last_message_id = message.id
|
||||||
m = MessageLog(message, self)
|
m = MessageLog(message, self)
|
||||||
await m.load(message)
|
await m.load(message)
|
||||||
self.messages.insert(0, m)
|
self.messages.insert(0, m)
|
||||||
yield len(self.messages), False
|
yield len(self.messages), False
|
||||||
else: # first load
|
|
||||||
last_message_id = None
|
|
||||||
done = 0
|
|
||||||
sanity_check = len(await channel.history(limit=1).flatten())
|
|
||||||
if sanity_check == 1:
|
|
||||||
while done >= CHUNK_SIZE or last_message_id is None:
|
|
||||||
done = 0
|
|
||||||
async for message in channel.history(
|
|
||||||
limit=CHUNK_SIZE,
|
|
||||||
before=FakeMessage(last_message_id)
|
|
||||||
if last_message_id is not None
|
|
||||||
else None,
|
|
||||||
oldest_first=False,
|
|
||||||
):
|
|
||||||
done += 1
|
|
||||||
last_message_id = message.id
|
|
||||||
m = MessageLog(message, self)
|
|
||||||
await m.load(message)
|
|
||||||
self.messages += [m]
|
|
||||||
yield len(self.messages), False
|
|
||||||
self.last_message_id = channel.last_message_id
|
|
||||||
except discord.errors.HTTPException:
|
except discord.errors.HTTPException:
|
||||||
yield -1, True
|
yield -1, True
|
||||||
return # When an exception occurs (like Forbidden)
|
return # When an exception occurs (like Forbidden)
|
||||||
|
|||||||
+21
-5
@@ -32,7 +32,13 @@ MAX_MODIFICATION_TIME = 365 * 24 * 60 * 60
|
|||||||
|
|
||||||
|
|
||||||
class Worker:
|
class Worker:
|
||||||
def __init__(self, channel_log: ChannelLogs, channel: discord.TextChannel):
|
def __init__(
|
||||||
|
self,
|
||||||
|
channel_log: ChannelLogs,
|
||||||
|
channel: discord.TextChannel,
|
||||||
|
start_date: datetime,
|
||||||
|
stop_date: datetime,
|
||||||
|
):
|
||||||
self.channel_log = channel_log
|
self.channel_log = channel_log
|
||||||
self.channel = channel
|
self.channel = channel
|
||||||
self.start_msg = len(channel_log.messages)
|
self.start_msg = len(channel_log.messages)
|
||||||
@@ -41,12 +47,16 @@ class Worker:
|
|||||||
self.done = False
|
self.done = False
|
||||||
self.cancelled = False
|
self.cancelled = False
|
||||||
self.loop = asyncio.get_event_loop()
|
self.loop = asyncio.get_event_loop()
|
||||||
|
self.start_date = start_date
|
||||||
|
self.stop_date = stop_date
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
asyncio.run_coroutine_threadsafe(self.process(), self.loop)
|
asyncio.run_coroutine_threadsafe(self.process(), self.loop)
|
||||||
|
|
||||||
async def process(self):
|
async def process(self):
|
||||||
async for count, done in self.channel_log.load(self.channel):
|
async for count, done in self.channel_log.load(
|
||||||
|
self.channel, self.start_date, self.stop_date
|
||||||
|
):
|
||||||
if count > 0:
|
if count > 0:
|
||||||
self.queried_msg = count - self.start_msg
|
self.queried_msg = count - self.start_msg
|
||||||
self.total_msg = count
|
self.total_msg = count
|
||||||
@@ -98,7 +108,9 @@ class GuildLogs:
|
|||||||
async def load(
|
async def load(
|
||||||
self,
|
self,
|
||||||
progress: discord.Message,
|
progress: discord.Message,
|
||||||
target_channels: List[discord.TextChannel] = [],
|
target_channels: List[discord.TextChannel],
|
||||||
|
start_date: datetime,
|
||||||
|
stop_date: datetime,
|
||||||
*,
|
*,
|
||||||
fast: bool,
|
fast: bool,
|
||||||
fresh: bool,
|
fresh: bool,
|
||||||
@@ -173,6 +185,8 @@ class GuildLogs:
|
|||||||
if (
|
if (
|
||||||
not fast
|
not fast
|
||||||
and not fresh
|
and not fresh
|
||||||
|
and start_date is None
|
||||||
|
and stop_date is None
|
||||||
and last_time is not None
|
and last_time is not None
|
||||||
and (time.time() - last_time) < MIN_MODIFICATION_TIME
|
and (time.time() - last_time) < MIN_MODIFICATION_TIME
|
||||||
):
|
):
|
||||||
@@ -214,7 +228,9 @@ class GuildLogs:
|
|||||||
if channel.id not in self.channels or fresh:
|
if channel.id not in self.channels or fresh:
|
||||||
loading_new += 1
|
loading_new += 1
|
||||||
self.channels[channel.id] = ChannelLogs(channel, self)
|
self.channels[channel.id] = ChannelLogs(channel, self)
|
||||||
workers += [Worker(self.channels[channel.id], channel)]
|
workers += [
|
||||||
|
Worker(self.channels[channel.id], channel, start_date, stop_date)
|
||||||
|
]
|
||||||
warning_msg = "(this might take a while)"
|
warning_msg = "(this might take a while)"
|
||||||
if len(target_channels) > 5 and loading_new > 5:
|
if len(target_channels) > 5 and loading_new > 5:
|
||||||
warning_msg = "(most channels are new, this will take a long while)"
|
warning_msg = "(most channels are new, this will take a long while)"
|
||||||
@@ -255,7 +271,7 @@ class GuildLogs:
|
|||||||
f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
|
f"Reading new history...\n{total_msg:,} messages in {total_chan:,}/{max_chan:,} channels ({round(queried_msg/deltas(t0)):,}m/s)\n{warning_msg}{remaining_msg}",
|
||||||
)
|
)
|
||||||
logging.info(
|
logging.info(
|
||||||
f"log {self.guild.id} > queried in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
|
f"log {self.guild.id} > queried {queried_msg} in {delta(t0):,}ms -> {queried_msg / deltas(t0):,.3f} m/s"
|
||||||
)
|
)
|
||||||
# write logs
|
# write logs
|
||||||
real_total_msg = sum(
|
real_total_msg = sum(
|
||||||
|
|||||||
+30
-22
@@ -90,10 +90,10 @@ class Scanner(ABC):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
self.start_datetime = None if len(dates) < 1 else min(dates)
|
self.start_date = None if len(dates) < 1 else min(dates)
|
||||||
self.stop_datetime = datetime.now() if len(dates) < 2 else max(dates)
|
self.stop_date = None if len(dates) < 2 else max(dates)
|
||||||
|
|
||||||
if self.start_datetime is not None and self.start_datetime > datetime.now():
|
if self.start_date is not None and self.start_date > datetime.now():
|
||||||
await message.channel.send(
|
await message.channel.send(
|
||||||
f"Start date is after today", reference=message
|
f"Start date is after today", reference=message
|
||||||
)
|
)
|
||||||
@@ -130,20 +130,13 @@ class Scanner(ABC):
|
|||||||
allowed_mentions=discord.AllowedMentions.none(),
|
allowed_mentions=discord.AllowedMentions.none(),
|
||||||
)
|
)
|
||||||
total_msg, total_chan = await logs.load(
|
total_msg, total_chan = await logs.load(
|
||||||
progress, self.channels, fast="fast" in args, fresh="fresh" in args
|
progress,
|
||||||
|
self.channels,
|
||||||
|
self.start_date,
|
||||||
|
self.stop_date,
|
||||||
|
fast="fast" in args,
|
||||||
|
fresh="fresh" in args,
|
||||||
)
|
)
|
||||||
if self.start_datetime is not None:
|
|
||||||
self.start_datetime = max(
|
|
||||||
self.start_datetime,
|
|
||||||
min(
|
|
||||||
[
|
|
||||||
logs.channels[channel.id].start_date
|
|
||||||
for channel in self.channels
|
|
||||||
if channel.id in logs.channels
|
|
||||||
and logs.channels[channel.id].start_date is not None
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
if total_msg == CANCELLED:
|
if total_msg == CANCELLED:
|
||||||
await message.channel.send(
|
await message.channel.send(
|
||||||
"Operation cancelled by user",
|
"Operation cancelled by user",
|
||||||
@@ -157,6 +150,21 @@ class Scanner(ABC):
|
|||||||
elif total_msg == NO_FILE:
|
elif total_msg == NO_FILE:
|
||||||
await message.channel.send(gdpr.TEXT)
|
await message.channel.send(gdpr.TEXT)
|
||||||
else:
|
else:
|
||||||
|
if self.start_date is not None:
|
||||||
|
self.start_date = max(
|
||||||
|
self.start_date,
|
||||||
|
min(
|
||||||
|
[
|
||||||
|
logs.channels[channel.id].start_date
|
||||||
|
for channel in self.channels
|
||||||
|
if channel.id in logs.channels
|
||||||
|
and logs.channels[channel.id].start_date is not None
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
if self.stop_date is None:
|
||||||
|
self.stop_date = datetime.utcnow()
|
||||||
|
|
||||||
self.msg_count = 0
|
self.msg_count = 0
|
||||||
self.total_msg = 0
|
self.total_msg = 0
|
||||||
self.chan_count = 0
|
self.chan_count = 0
|
||||||
@@ -169,12 +177,12 @@ class Scanner(ABC):
|
|||||||
self.compute_message(channel_logs, message_log)
|
self.compute_message(channel_logs, message_log)
|
||||||
for message_log in channel_logs.messages
|
for message_log in channel_logs.messages
|
||||||
if (
|
if (
|
||||||
self.start_datetime is None
|
self.start_date is None
|
||||||
or message_log.created_at >= self.start_datetime
|
or message_log.created_at >= self.start_date
|
||||||
)
|
)
|
||||||
and (
|
and (
|
||||||
self.stop_datetime is None
|
self.stop_date is None
|
||||||
or message_log.created_at <= self.stop_datetime
|
or message_log.created_at <= self.stop_date
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@@ -199,8 +207,8 @@ class Scanner(ABC):
|
|||||||
self.members,
|
self.members,
|
||||||
self.msg_count,
|
self.msg_count,
|
||||||
self.chan_count,
|
self.chan_count,
|
||||||
self.start_datetime,
|
self.start_date,
|
||||||
self.stop_datetime,
|
self.stop_date,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
|
logging.info(f"scan {guild.id} > results in {delta(t0):,}ms")
|
||||||
|
|||||||
+1
-3
@@ -180,9 +180,7 @@ def parse_iso_datetime(str_date: str) -> datetime:
|
|||||||
return dateutil.parser.parse(str_date)
|
return dateutil.parser.parse(str_date)
|
||||||
|
|
||||||
|
|
||||||
RELATIVE_REGEX = (
|
RELATIVE_REGEX = r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?)?)"
|
||||||
r"(yesterday|today|\d*h(ours?)?|\d*d(ays?)?|\d*w(eeks?)?|\d*m(onths?)?|\d*y(ears?))"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def parse_relative_time(src: str) -> datetime:
|
def parse_relative_time(src: str) -> datetime:
|
||||||
|
|||||||
Reference in New Issue
Block a user