# bundle_parser.py import requests import json from bs4 import BeautifulSoup import logging logger = logging.getLogger(__name__) class BundleParser: def __init__(self, url, category=None): self.url = url self.category = category def fetch_data(self): logger.info(f"Rufe Bundle-Daten von {self.url} ab...") response = requests.get(self.url) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") script_tag = soup.find("script", {"id": "webpack-bundle-page-data", "type": "application/json"}) if not script_tag: logger.error("Kein JSON-Datenblock 'webpack-bundle-page-data' gefunden!") raise ValueError("Kein JSON-Datenblock gefunden!") data = json.loads(script_tag.string) logger.debug(f"Erhaltener JSON-Block (gekürzt): {str(data)[:200]} ...") return data def get_relevant_bundle_data(self): data = self.fetch_data() return data.get("bundleData", {}) def parse_items(self): bundle_data = self.get_relevant_bundle_data() items = bundle_data.get("items", []) parsed_items = [] for item in items: title = item.get("title", "Unbekannt") category = item.get("category", self.category if self.category else "Unbekannt") details = json.dumps(item, sort_keys=True, ensure_ascii=False) parsed_items.append({ "title": title, "category": category, "details": details }) return parsed_items