Refactor JSON file retrieval to use regex for improved accuracy in matching file links and streamline file name extraction.
This commit is contained in:
19
main.py
19
main.py
@@ -7,7 +7,7 @@
|
||||
import sys
|
||||
import galdPl
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import os
|
||||
|
||||
def get_json_files_from_folder(folder):
|
||||
@@ -15,15 +15,16 @@ def get_json_files_from_folder(folder):
|
||||
url = base_url + folder
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
# Hledáme JSON soubory pomocí regex
|
||||
json_pattern = r'href="(/gald/galdistream/src/branch/main/resources/[^"]*\.json)"'
|
||||
matches = re.findall(json_pattern, r.text)
|
||||
|
||||
files = []
|
||||
# Hledáme odkazy s .json v href
|
||||
for a in soup.find_all("a", href=lambda x: x and x.endswith('.json')):
|
||||
href = a.get("href", "")
|
||||
if href.startswith("/gald/galdistream/src/branch/main/resources/"):
|
||||
# Extrahujeme pouze název souboru
|
||||
file_name = href.split("/")[-1]
|
||||
files.append(file_name)
|
||||
for match in matches:
|
||||
# Extrahujeme pouze název souboru
|
||||
file_name = match.split("/")[-1]
|
||||
files.append(file_name)
|
||||
return files
|
||||
|
||||
def update_json_db():
|
||||
|
||||
Reference in New Issue
Block a user