feat: initialize HTML scraper API with Flask and SeleniumBase
This commit is contained in:
0
app/routes/__init__.py
Normal file
0
app/routes/__init__.py
Normal file
12
app/routes/health_routes.py
Normal file
12
app/routes/health_routes.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from flask import Blueprint, jsonify
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
health_bp = Blueprint('health', __name__, url_prefix='/api')
|
||||
|
||||
|
||||
@health_bp.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
logger.info("Health check requested")
|
||||
return jsonify({"status": "ok"})
|
||||
40
app/routes/scrape_routes.py
Normal file
40
app/routes/scrape_routes.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from flask import Blueprint, request, jsonify
|
||||
from app.scraper.html_scraper import HtmlScraper
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
scrape_bp = Blueprint('scrape', __name__, url_prefix='/api')
|
||||
|
||||
|
||||
@scrape_bp.route('/scrape', methods=['POST'])
|
||||
def scrape_html():
|
||||
"""
|
||||
Return the HTML content of a given page.
|
||||
|
||||
Expected JSON payload:
|
||||
{
|
||||
"url": "https://example.com"
|
||||
}
|
||||
"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
|
||||
if not data or 'url' not in data:
|
||||
logger.warning("Missing url in request")
|
||||
return jsonify({"success": False, "error": "Missing url in request"}), 400
|
||||
|
||||
target_url = data['url']
|
||||
logger.info(f"Scraping HTML for URL: {target_url}")
|
||||
|
||||
scraper = HtmlScraper()
|
||||
|
||||
try:
|
||||
html = scraper.get_page_html(target_url)
|
||||
return jsonify({"success": True, "html": html})
|
||||
finally:
|
||||
scraper.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error scraping page: {str(e)}")
|
||||
return jsonify({"success": False, "error": str(e)}), 500
|
||||
Reference in New Issue
Block a user