feat: initialize HTML scraper API with Flask and SeleniumBase

This commit is contained in:
2026-02-13 16:03:35 +01:00
commit 9659382d62
16 changed files with 1707 additions and 0 deletions

48
Dockerfile Normal file
View File

@@ -0,0 +1,48 @@
FROM python:3.12-slim
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install system dependencies for Chrome / SeleniumBase
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
gnupg2 \
unzip \
curl \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
ENV UV_NO_DEV=1
# Install dependencies (cached layer)
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --locked --no-install-project
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked
ENV HEADLESS_BROWSER=True
ENV PORT=4001
EXPOSE 4001
CMD ["uv", "run", "gunicorn", "--bind", "0.0.0.0:4001", "--timeout", "120", "--workers", "2", "run:app"]