new url validate util + web file reading

This commit is contained in:
klemek
2020-04-29 12:49:35 +02:00
parent 9f46e8b8f9
commit f44e32fbf8
2 changed files with 39 additions and 1 deletions
+23 -1
View File
@@ -1,5 +1,8 @@
import re
import sys
from urllib.request import urlopen
from urllib.error import URLError
from urllib.parse import urlparse
import os.path as path
from typing import List, Optional, Union, Tuple, BinaryIO
from Levenshtein import distance
@@ -214,7 +217,7 @@ def safe_index(src: Union[str, list], pattern, start: int = 0):
# endregion
# region stream utils
# region bytes utils
def read_stream(stream: BinaryIO) -> bytes:
@@ -223,4 +226,23 @@ def read_stream(stream: BinaryIO) -> bytes:
output_data += line
return output_data
def read_web(url: str, timeout: int = 5) -> Optional[bytes]:
try:
with urlopen(url, None, timeout) as web_file:
return web_file.read()
except URLError:
return None
# endregion
# region URL utils
def validate_url(url: str) -> bool:
parsed = urlparse(url)
return parsed.scheme != "" and parsed.netloc != ""
# endregion
+16
View File
@@ -165,3 +165,19 @@ class TestUtilsFormat(TestCase):
self.assertEqual([5, 9, 15], utils.place_line_breaks([5.2, 14.3, 15.2], [3, 5, 9, 15, 18]))
self.assertEqual([5, 9, 15, 18], utils.place_line_breaks([5.2, 14.3, 14.5, 15.2], [3, 5, 9, 15, 18]))
self.assertEqual([5, 9, 15, 18], utils.place_line_breaks([5.2, 14.3, 14.5, 15.2], [3, 5, 9, 15, 18, 20]))
class TestUtilsStream(TestCase):
def test_read_stream(self):
pass # TODO
def test_read_web(self):
pass # TODO
class TestUtilsUrl(TestCase):
def test_validate_url(self):
self.assertTrue(utils.validate_url("https://google.com/page#anchor?key=value&query"))
self.assertFalse(utils.validate_url("https:google.com/page#anchor?key=value&query"))
self.assertFalse(utils.validate_url(""))
self.assertFalse(utils.validate_url("google.com"))