41 lines
1.1 KiB
Python
41 lines
1.1 KiB
Python
from flask import Blueprint, request, jsonify
|
|
from app.scraper.html_scraper import HtmlScraper
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
scrape_bp = Blueprint('scrape', __name__, url_prefix='/api')
|
|
|
|
|
|
@scrape_bp.route('/scrape', methods=['POST'])
|
|
def scrape_html():
|
|
"""
|
|
Return the HTML content of a given page.
|
|
|
|
Expected JSON payload:
|
|
{
|
|
"url": "https://example.com"
|
|
}
|
|
"""
|
|
try:
|
|
data = request.get_json()
|
|
|
|
if not data or 'url' not in data:
|
|
logger.warning("Missing url in request")
|
|
return jsonify({"success": False, "error": "Missing url in request"}), 400
|
|
|
|
target_url = data['url']
|
|
logger.info(f"Scraping HTML for URL: {target_url}")
|
|
|
|
scraper = HtmlScraper()
|
|
|
|
try:
|
|
html = scraper.get_page_html(target_url)
|
|
return jsonify({"success": True, "html": html})
|
|
finally:
|
|
scraper.close()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scraping page: {str(e)}")
|
|
return jsonify({"success": False, "error": str(e)}), 500
|