46 lines
1.6 KiB
Python
46 lines
1.6 KiB
Python
# bundle_parser.py
|
|
|
|
import requests
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class BundleParser:
|
|
def __init__(self, url, category=None):
|
|
self.url = url
|
|
self.category = category
|
|
|
|
def fetch_data(self):
|
|
logger.info(f"Rufe Bundle-Daten von {self.url} ab...")
|
|
response = requests.get(self.url)
|
|
response.raise_for_status()
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
script_tag = soup.find("script", {"id": "webpack-bundle-page-data", "type": "application/json"})
|
|
if not script_tag:
|
|
logger.error("Kein JSON-Datenblock 'webpack-bundle-page-data' gefunden!")
|
|
raise ValueError("Kein JSON-Datenblock gefunden!")
|
|
data = json.loads(script_tag.string)
|
|
logger.debug(f"Erhaltener JSON-Block (gekürzt): {str(data)[:200]} ...")
|
|
return data
|
|
|
|
def get_relevant_bundle_data(self):
|
|
data = self.fetch_data()
|
|
return data.get("bundleData", {})
|
|
|
|
def parse_items(self):
|
|
bundle_data = self.get_relevant_bundle_data()
|
|
items = bundle_data.get("items", [])
|
|
parsed_items = []
|
|
for item in items:
|
|
title = item.get("title", "Unbekannt")
|
|
category = item.get("category", self.category if self.category else "Unbekannt")
|
|
details = json.dumps(item, sort_keys=True, ensure_ascii=False)
|
|
parsed_items.append({
|
|
"title": title,
|
|
"category": category,
|
|
"details": details
|
|
})
|
|
return parsed_items
|