meme-otron/meme_otron/utils.py

import re
import select
import sys
from urllib.request import urlopen
from urllib.error import URLError
from urllib.parse import urlparse
import os.path as path
from typing import List, Optional, Union, Tuple, BinaryIO
from Levenshtein import distance


# region path utils


def relative_path(file: str, *args: str) -> str:
    return path.realpath(path.join(path.dirname(path.realpath(file)), *args))


# endregion

# region dict utils


def read_key_safe(d: dict, k: str, default=None, *,
                  types: Optional[List[type]] = None,
                  is_list: bool = False,
                  is_list_size: Optional[int] = None):
    try:
        return read_key(d, k, default, types=types, is_list=is_list, is_list_size=is_list_size)
    except KeyError:
        return default


def read_key(d: dict, k: str, default=None, *,
             types: Optional[List[type]] = None,
             is_list: bool = False,
             is_list_size: Optional[int] = None):
    if k in d:
        v = d[k]
        if types is not None:
            try:
                check_type(v, types, is_list, is_list_size)
            except TypeError as e:
                raise TypeError(f"'{k}' is {e}")
        return v
    elif default is not None:
        return default
    else:
        raise KeyError(k)


# endregion

# region type utils


def check_type(obj, types: List[type], is_list: bool = False, is_list_size: Optional[int] = None):
    if is_list:
        if not is_list_of(obj, types, is_list_size):
            if is_list_size is not None:
                raise TypeError(f"not a list of {is_list_size} {types[0].__name__}")
            else:
                raise TypeError(f"not a list of {types[0].__name__}")
    else:
        if not is_list_of([obj], types):
            raise TypeError(f"not a {types[0].__name__}")


def is_list_of(obj, types: List[type], length: Optional[int] = None) -> bool:
    if not (isinstance(obj, list)):
        return False
    for item in obj:
        found = False
        for t in types:
            if isinstance(item, t):
                found = True
                break
        if not found:
            return False
    if length is not None and len(obj) != length:
        return False
    return True


# endregion

# region args utils


args_regex = re.compile('"([^"]*)"|\'([^\']*)\'|([^ ]+)')


def parse_arguments(src: str) -> List[str]:
    def get_found_match(m: list) -> str:
        f = [g for g in m if len(g) > 0]
        if len(f) > 0:
            return f[0]
        return ""

    return [get_found_match(m) for m in args_regex.findall(src)]


def read_argument(args: List[str], *names: str, valued: bool = False, delete: bool = False):
    for i, arg in enumerate(args):
        if arg.lower() in names:
            if delete:
                del args[i]
                i -= 1
            if not valued:
                return True
            else:
                v = None
                if i + 1 < len(args):
                    v = args[i + 1]
                    if delete:
                        del args[i + 1]
                return v
    if valued:
        return None
    else:
        return False


def split_arguments(args: Union[List[str], Tuple[str]], separator: str) -> List[List[str]]:
    output = [[]]
    for argument in args:
        if argument == separator:
            output += [[]]
        else:
            output[-1] += [argument]
    return [part for part in output if len(part) > 0]


# endregion

# region lang utils


def find_nearest(word: str, wlist: List[str], threshold: int = 5) -> Optional[str]:
    distances = [
        (distance(word, w),  # distance
         abs(len(w) - len(word)),  # length diff
         w)
        for w in wlist]
    distances.sort(key=lambda v: v[1])  # sort by length diff to get the closest (in length) first
    found = min(distances, key=lambda v: v[0] - v[1])  # get the closest in lev. distance
    if found[0] - found[1] > threshold:  # distance is too much
        return None
    return found[2]


def sanitize_input(src: str) -> str:
    return re.sub(r'[^A-Za-z0-9 _]', "", src.lower().strip())


# endregion

# region format utils

def justify_text(src: str, n_lines: int) -> Optional[str]:
    spaces_indexes = find_all(src, " ")
    if n_lines - 1 > len(spaces_indexes):
        return None  # impossible
    if n_lines - 1 == len(spaces_indexes):
        return replace_at(src, "\n", spaces_indexes, 1)
    breaks_positions = [k * (len(src) - 1) / n_lines for k in range(1, n_lines)]
    break_indexes = place_line_breaks(breaks_positions, spaces_indexes)
    return replace_at(src, "\n", break_indexes, 1)


def place_line_breaks(breaks_positions: List[float], spaces_indexes: List[int]) -> List[int]:
    breaks_positions = breaks_positions[:]
    breaks_indexes = []
    dist = sys.maxsize
    for i, value in enumerate(spaces_indexes):
        if not len(breaks_positions):
            break
        if dist < abs(value - breaks_positions[0]):
            breaks_indexes += [spaces_indexes[i - 1]]
            breaks_positions.pop(0)
        else:
            dist = abs(value - breaks_positions[0])
    if len(breaks_positions):
        breaks_indexes += [spaces_indexes[-1]]
    return breaks_indexes


# endregion

# region string utils


def find_all(src: str, pattern: str) -> List[int]:
    indexes = []
    i = safe_index(src, pattern)
    while i is not None:
        indexes += [i]
        i = safe_index(src, pattern, i + 1)
    return indexes


def replace_at(src: str, pattern: str, indexes: List[int], remove: int) -> str:
    output = ""
    start_index = 0
    for i in indexes:
        output += src[start_index:i] + pattern
        start_index = i + remove
    output += src[start_index:]
    return output


def safe_index(src: Union[str, list], pattern, start: int = 0):
    try:
        return src.index(pattern, start)
    except ValueError:
        return None


# endregion

# region bytes utils


def is_stdin_ready() -> bool:
    """
    https://stackoverflow.com/questions/3762881/how-do-i-check-if-stdin-has-some-data
    """
    return sys.stdin.isatty() and select.select([sys.stdin, ], [], [], 0.0)[0]


def read_stream(stream: BinaryIO) -> bytes:
    output_data = bytearray()
    for line in stream:
        output_data += line
    return output_data


def read_web(url: str, timeout: int = 5) -> Optional[bytes]:
    try:
        with urlopen(url, None, timeout) as web_file:
            return web_file.read()
    except URLError:
        return None


# endregion


# region URL utils


def validate_url(url: str) -> bool:
    parsed = urlparse(url)
    return parsed.scheme != "" and parsed.netloc != ""

# endregion