Files
html-scraper/Dockerfile

55 lines
1.4 KiB
Docker

FROM --platform=linux/amd64 python:3.12-slim
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install system dependencies + Google Chrome
RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
gnupg2 \
unzip \
curl \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
xdg-utils \
&& wget -q -O /tmp/google-chrome.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb \
&& apt-get install -y --no-install-recommends /tmp/google-chrome.deb \
&& rm /tmp/google-chrome.deb \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
ENV UV_NO_DEV=1
# Install dependencies (cached layer)
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --locked --no-install-project
COPY . /app
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked
# Pre-download chromedriver matching installed Chrome
RUN uv run python -c "from seleniumbase import Driver; d = Driver(uc=True, headless=True); d.quit()"
ENV HEADLESS_BROWSER=True
ENV PORT=4001
EXPOSE 4001
CMD ["uv", "run", "gunicorn", "--bind", "0.0.0.0:4001", "--timeout", "120", "--workers", "2", "run:app"]