Files
html-scraper/app/routes/scrape_routes.py

41 lines
1.1 KiB
Python

from flask import Blueprint, request, jsonify
from app.scraper.html_scraper import HtmlScraper
import logging
logger = logging.getLogger(__name__)
scrape_bp = Blueprint('scrape', __name__, url_prefix='/api')
@scrape_bp.route('/scrape', methods=['POST'])
def scrape_html():
"""
Return the HTML content of a given page.
Expected JSON payload:
{
"url": "https://example.com"
}
"""
try:
data = request.get_json()
if not data or 'url' not in data:
logger.warning("Missing url in request")
return jsonify({"success": False, "error": "Missing url in request"}), 400
target_url = data['url']
logger.info(f"Scraping HTML for URL: {target_url}")
scraper = HtmlScraper()
try:
html = scraper.get_page_html(target_url)
return jsonify({"success": True, "html": html})
finally:
scraper.close()
except Exception as e:
logger.error(f"Error scraping page: {str(e)}")
return jsonify({"success": False, "error": str(e)}), 500