From f44e32fbf8ef9f49262f66deafe4eff32691e0d7 Mon Sep 17 00:00:00 2001 From: klemek Date: Wed, 29 Apr 2020 12:49:35 +0200 Subject: [PATCH] new url validate util + web file reading --- meme_otron/utils.py | 24 +++++++++++++++++++++++- tests/unit/meme_otron/test_utils.py | 16 ++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/meme_otron/utils.py b/meme_otron/utils.py index db99a67..291344a 100644 --- a/meme_otron/utils.py +++ b/meme_otron/utils.py @@ -1,5 +1,8 @@ import re import sys +from urllib.request import urlopen +from urllib.error import URLError +from urllib.parse import urlparse import os.path as path from typing import List, Optional, Union, Tuple, BinaryIO from Levenshtein import distance @@ -214,7 +217,7 @@ def safe_index(src: Union[str, list], pattern, start: int = 0): # endregion -# region stream utils +# region bytes utils def read_stream(stream: BinaryIO) -> bytes: @@ -223,4 +226,23 @@ def read_stream(stream: BinaryIO) -> bytes: output_data += line return output_data + +def read_web(url: str, timeout: int = 5) -> Optional[bytes]: + try: + with urlopen(url, None, timeout) as web_file: + return web_file.read() + except URLError: + return None + + +# endregion + + +# region URL utils + + +def validate_url(url: str) -> bool: + parsed = urlparse(url) + return parsed.scheme != "" and parsed.netloc != "" + # endregion diff --git a/tests/unit/meme_otron/test_utils.py b/tests/unit/meme_otron/test_utils.py index cf2982b..36af9f6 100644 --- a/tests/unit/meme_otron/test_utils.py +++ b/tests/unit/meme_otron/test_utils.py @@ -165,3 +165,19 @@ class TestUtilsFormat(TestCase): self.assertEqual([5, 9, 15], utils.place_line_breaks([5.2, 14.3, 15.2], [3, 5, 9, 15, 18])) self.assertEqual([5, 9, 15, 18], utils.place_line_breaks([5.2, 14.3, 14.5, 15.2], [3, 5, 9, 15, 18])) self.assertEqual([5, 9, 15, 18], utils.place_line_breaks([5.2, 14.3, 14.5, 15.2], [3, 5, 9, 15, 18, 20])) + + +class TestUtilsStream(TestCase): + def test_read_stream(self): + pass # TODO + + def test_read_web(self): + pass # TODO + + +class TestUtilsUrl(TestCase): + def test_validate_url(self): + self.assertTrue(utils.validate_url("https://google.com/page#anchor?key=value&query")) + self.assertFalse(utils.validate_url("https:google.com/page#anchor?key=value&query")) + self.assertFalse(utils.validate_url("")) + self.assertFalse(utils.validate_url("google.com"))