first commit

This commit is contained in:
Klemek
2023-06-03 20:17:51 +02:00
parent b2cb57840d
commit 3f3f43769c
5 changed files with 170 additions and 89 deletions
+21
View File
@@ -0,0 +1,21 @@
import requests
import re
import json
BASE_URL = "http://dicosemiopsy.asso-aesp.fr"
r = requests.get(BASE_URL + "/index.php/lexique.html")
data = []
for path in re.findall(r"<li><a href=\"(.*)\">.*</a></li>", r.content.decode("utf-8")):
r = requests.get(BASE_URL + path)
results = re.findall(
r"<h1>(.+)</h1>\s+(.+)</div>\s+</div><!-- /content -->",
r.content.decode("utf-8").replace("&#039;", "'"),
re.MULTILINE,
)
data += [{"name": results[0][0], "description": results[0][1]}]
with open("data.json", mode="w") as f:
f.write(json.dumps(data))