import re import select import sys from urllib.request import urlopen from urllib.error import URLError, HTTPError from urllib.parse import urlparse import os.path as path from typing import List, Optional, Union, Tuple, BinaryIO try: from Levenshtein import distance except ModuleNotFoundError: distance = None # region path utils def relative_path(file: str, *args: str) -> str: return path.realpath(path.join(path.dirname(path.realpath(file)), *args)) # endregion # region dict utils def read_key_safe(d: dict, k: str, default=None, *, types: Optional[List[type]] = None, is_list: bool = False, is_list_size: Optional[int] = None): try: return read_key(d, k, default, types=types, is_list=is_list, is_list_size=is_list_size) except KeyError: return default def read_key(d: dict, k: str, default=None, *, types: Optional[List[type]] = None, is_list: bool = False, is_list_size: Optional[int] = None): if k in d: v = d[k] if types is not None: try: check_type(v, types, is_list, is_list_size) except TypeError as e: raise TypeError(f"'{k}' is {e}") return v elif default is not None: return default else: raise KeyError(k) # endregion # region type utils def check_type(obj, types: List[type], is_list: bool = False, is_list_size: Optional[int] = None): if is_list: if not is_list_of(obj, types, is_list_size): if is_list_size is not None: raise TypeError(f"not a list of {is_list_size} {types[0].__name__}") else: raise TypeError(f"not a list of {types[0].__name__}") else: if not is_list_of([obj], types): raise TypeError(f"not a {types[0].__name__}") def is_list_of(obj, types: List[type], length: Optional[int] = None) -> bool: if not (isinstance(obj, list)): return False for item in obj: found = False for t in types: if isinstance(item, t): found = True break if not found: return False if length is not None and len(obj) != length: return False return True # endregion # region args utils args_regex = re.compile('"([^"]*)"|\'([^\']*)\'|([^ ]+)') def parse_arguments(src: str) -> List[str]: def get_found_match(m: list) -> str: f = [g for g in m if len(g) > 0] if len(f) > 0: return f[0] return "" return [get_found_match(m) for m in args_regex.findall(src)] def read_argument(args: List[str], *names: str, valued: bool = False, delete: bool = False): for i, arg in enumerate(args): if arg.lower() in names: if delete: del args[i] i -= 1 if not valued: return True else: v = None if i + 1 < len(args): v = args[i + 1] if delete: del args[i + 1] return v if valued: return None else: return False def split_arguments(args: Union[List[str], Tuple[str]], separator: str) -> List[List[str]]: output = [[]] for argument in args: if argument == separator: output += [[]] else: output[-1] += [argument] return [part for part in output if len(part) > 0] # endregion # region lang utils def find_nearest(word: str, wlist: List[str], threshold: int = 5) -> Optional[str]: if distance is None: return None distances = [ (distance(word, w), # distance abs(len(w) - len(word)), # length diff w) for w in wlist] distances.sort(key=lambda v: v[1]) # sort by length diff to get the closest (in length) first found = min(distances, key=lambda v: v[0] - v[1]) # get the closest in lev. distance if found[0] - found[1] > threshold: # distance is too much return None return found[2] def sanitize_input(src: str) -> str: return re.sub(r'[^A-Za-z0-9 _]', "", src.lower().strip()) # endregion # region format utils def justify_text(src: str, n_lines: int) -> Optional[str]: spaces_indexes = find_all(src, " ") if n_lines - 1 > len(spaces_indexes): return None # impossible if n_lines - 1 == len(spaces_indexes): return replace_at(src, "\n", spaces_indexes, 1) breaks_positions = [k * (len(src) - 1) / n_lines for k in range(1, n_lines)] break_indexes = place_line_breaks(breaks_positions, spaces_indexes) return replace_at(src, "\n", break_indexes, 1) def place_line_breaks(breaks_positions: List[float], spaces_indexes: List[int]) -> List[int]: breaks_positions = breaks_positions[:] breaks_indexes = [] dist = sys.maxsize for i, value in enumerate(spaces_indexes): if not len(breaks_positions): break if dist < abs(value - breaks_positions[0]): breaks_indexes += [spaces_indexes[i - 1]] breaks_positions.pop(0) else: dist = abs(value - breaks_positions[0]) if len(breaks_positions): breaks_indexes += [spaces_indexes[-1]] return breaks_indexes # endregion # region string utils def find_all(src: str, pattern: str) -> List[int]: indexes = [] i = safe_index(src, pattern) while i is not None: indexes += [i] i = safe_index(src, pattern, i + 1) return indexes def replace_at(src: str, pattern: str, indexes: List[int], remove: int) -> str: output = "" start_index = 0 for i in indexes: output += src[start_index:i] + pattern start_index = i + remove output += src[start_index:] return output def safe_index(src: Union[str, list], pattern, start: int = 0): try: return src.index(pattern, start) except ValueError: return None # endregion # region stream utils def is_stdin_ready() -> bool: """ https://stackoverflow.com/questions/3762881/how-do-i-check-if-stdin-has-some-data """ return sys.stdin.isatty() and select.select([sys.stdin, ], [], [], 0.0)[0] def read_stream(stream: BinaryIO) -> bytes: output_data = bytearray() for line in stream: output_data += line return output_data # endregion # region web utils def read_web_file(url: str, *, timeout: float = 5, max_file_size: Optional[int] = None) -> Tuple[Optional[bytes], Optional[str]]: if not validate_url(url): return None, 'Invalid URL' try: with urlopen(url, None, timeout) as web_file: if max_file_size is not None and int(web_file.info()['Content-Length']) > max_file_size: return None, 'File too big' return web_file.read(), None except HTTPError as e: return None, f'Could not connect: {e}' except URLError: return None, f'Could not connect to server' def validate_url(url: str) -> bool: parsed = urlparse(url) return parsed.scheme != "" and parsed.netloc != "" # endregion