new url validate util + web file reading
This commit is contained in:
+23
-1
@@ -1,5 +1,8 @@
|
||||
import re
|
||||
import sys
|
||||
from urllib.request import urlopen
|
||||
from urllib.error import URLError
|
||||
from urllib.parse import urlparse
|
||||
import os.path as path
|
||||
from typing import List, Optional, Union, Tuple, BinaryIO
|
||||
from Levenshtein import distance
|
||||
@@ -214,7 +217,7 @@ def safe_index(src: Union[str, list], pattern, start: int = 0):
|
||||
|
||||
# endregion
|
||||
|
||||
# region stream utils
|
||||
# region bytes utils
|
||||
|
||||
|
||||
def read_stream(stream: BinaryIO) -> bytes:
|
||||
@@ -223,4 +226,23 @@ def read_stream(stream: BinaryIO) -> bytes:
|
||||
output_data += line
|
||||
return output_data
|
||||
|
||||
|
||||
def read_web(url: str, timeout: int = 5) -> Optional[bytes]:
|
||||
try:
|
||||
with urlopen(url, None, timeout) as web_file:
|
||||
return web_file.read()
|
||||
except URLError:
|
||||
return None
|
||||
|
||||
|
||||
# endregion
|
||||
|
||||
|
||||
# region URL utils
|
||||
|
||||
|
||||
def validate_url(url: str) -> bool:
|
||||
parsed = urlparse(url)
|
||||
return parsed.scheme != "" and parsed.netloc != ""
|
||||
|
||||
# endregion
|
||||
|
||||
@@ -165,3 +165,19 @@ class TestUtilsFormat(TestCase):
|
||||
self.assertEqual([5, 9, 15], utils.place_line_breaks([5.2, 14.3, 15.2], [3, 5, 9, 15, 18]))
|
||||
self.assertEqual([5, 9, 15, 18], utils.place_line_breaks([5.2, 14.3, 14.5, 15.2], [3, 5, 9, 15, 18]))
|
||||
self.assertEqual([5, 9, 15, 18], utils.place_line_breaks([5.2, 14.3, 14.5, 15.2], [3, 5, 9, 15, 18, 20]))
|
||||
|
||||
|
||||
class TestUtilsStream(TestCase):
|
||||
def test_read_stream(self):
|
||||
pass # TODO
|
||||
|
||||
def test_read_web(self):
|
||||
pass # TODO
|
||||
|
||||
|
||||
class TestUtilsUrl(TestCase):
|
||||
def test_validate_url(self):
|
||||
self.assertTrue(utils.validate_url("https://google.com/page#anchor?key=value&query"))
|
||||
self.assertFalse(utils.validate_url("https:google.com/page#anchor?key=value&query"))
|
||||
self.assertFalse(utils.validate_url(""))
|
||||
self.assertFalse(utils.validate_url("google.com"))
|
||||
|
||||
Reference in New Issue
Block a user