diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f4a83ea --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +__pycache__/ +*.pyc +.git/ +uploads/ +results/ +annotated/ +tests/ +*.md +.github/ diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 0dc53be..74707aa 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -2,74 +2,79 @@ name: Build Docker Images on: push: - branches: [ main ] + branches: [ main, docker_beauty ] # TODO: remove docker_beauty before merging workflow_dispatch: env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} + IMAGE: breedinginsight/nemaquant # Docker Hub org/image jobs: build: runs-on: ubuntu-latest - permissions: - contents: read - packages: write + strategy: + fail-fast: false + matrix: + include: + - variant: cpu + dockerfile: Dockerfile + tag_suffix: "" # cpu is the default, no suffix on latest/version tags + - variant: gpu + dockerfile: Dockerfile.gpu + tag_suffix: "-gpu" steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 + + - name: Extract version from CHANGELOG.md + id: version + run: | + VERSION=$(grep -m1 '^## \[' CHANGELOG.md | sed 's/.*\[\(.*\)\].*/\1/') + if [ -z "$VERSION" ]; then + echo "ERROR: Could not extract version from CHANGELOG.md" >&2 + exit 1 + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "Detected version: $VERSION" - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Log in to GitHub Container Registry + - name: Log in to Docker Hub uses: docker/login-action@v3 with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Log in to Docker Hub - # if: ${{ secrets.DOCKERHUB_USERNAME }} - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_TOKEN }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} - name: Extract metadata id: meta uses: docker/metadata-action@v5 with: - images: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # ${{ secrets.DOCKERHUB_USERNAME }}/nemaquant + images: ${{ env.IMAGE }} tags: | - type=raw,value=latest - type=sha,prefix=main- + # version from CHANGELOG.md: produce 1.2.3 / 1.2.3-gpu + type=raw,value=${{ steps.version.outputs.version }}${{ matrix.tag_suffix }} + # on main branch: produce latest / latest-gpu + type=raw,value=latest,suffix=${{ matrix.tag_suffix }} - - name: Build and push Docker image - uses: docker/build-push-action@v5 + - name: Build and push + uses: docker/build-push-action@v6 with: context: . + file: ${{ matrix.dockerfile }} platforms: linux/amd64 + provenance: false + sbom: false push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max + cache-from: type=gha,scope=${{ matrix.variant }} + cache-to: type=gha,mode=max,scope=${{ matrix.variant }} - name: Summary run: | - echo "## Build Summary" >> $GITHUB_STEP_SUMMARY - echo "- **GitHub Container Registry**: \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest\`" >> $GITHUB_STEP_SUMMARY - echo "- **Platform**: linux/amd64 (optimized for Docker Desktop)" >> $GITHUB_STEP_SUMMARY - # if [ -n "${{ secrets.DOCKERHUB_USERNAME }}" ]; then - # echo "- **Docker Hub**: \`${{ secrets.DOCKERHUB_USERNAME }}/nemaquant:latest\`" >> $GITHUB_STEP_SUMMARY - # fi - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Pull the image:" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY - echo "docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITHUB_STEP_SUMMARY + echo "## ${{ matrix.variant }} image" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY echo "\`\`\`" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/deploy-to-hf.yml b/.github/workflows/deploy-to-hf.yml index 63ada9f..2ba4328 100644 --- a/.github/workflows/deploy-to-hf.yml +++ b/.github/workflows/deploy-to-hf.yml @@ -2,7 +2,7 @@ name: Deploy to Hugging Face Spaces on: push: - branches: [ main ] + branches: [ main, docker_beauty ] # TODO: remove docker_beauty before merging workflow_dispatch: jobs: @@ -11,94 +11,96 @@ jobs: permissions: contents: read + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} + HF_SPACE_REPO: ${{ secrets.HUGGINGFACE_SPACE_REPO }} + steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: lfs: true - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.12' + - name: Validate secrets + run: | + if [ -z "$HF_TOKEN" ]; then + echo "ERROR: HUGGINGFACE_TOKEN secret is not set." >&2 + exit 1 + fi + if [ -z "$HF_SPACE_REPO" ]; then + echo "ERROR: HUGGINGFACE_SPACE_REPO secret is not set." >&2 + exit 1 + fi - - name: Install dependencies + - name: Configure git run: | - pip install huggingface_hub - # Install Git LFS - sudo apt-get update - sudo apt-get install -y git-lfs + git config --global user.name "GitHub Actions" + git config --global user.email "actions@github.com" + git config --global credential.helper store + # HF accepts any username when authenticating via token + printf 'https://user:%s@huggingface.co\n' "$HF_TOKEN" > ~/.git-credentials git lfs install - - name: Push to Hugging Face Space - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} + - name: Clone existing HF Space run: | - # Check if HUGGINGFACE_SPACE_REPO is set - if [ -z "${{ secrets.HUGGINGFACE_SPACE_REPO }}" ]; then - echo "HUGGINGFACE_SPACE_REPO secret not set. Please set it to your space repository name (e.g., 'username/nemaquant')" - exit 1 - fi - - # Configure git credentials for Hugging Face - git config --global credential.helper store - echo "https://user:${HF_TOKEN}@huggingface.co" > ~/.git-credentials - - # Create a temporary directory for the space - mkdir -p space_repo + git clone "https://huggingface.co/spaces/$HF_SPACE_REPO" space_repo cd space_repo - - # Initialize git repository with LFS - git init -b main git lfs install - git remote add origin https://huggingface.co/spaces/${{ secrets.HUGGINGFACE_SPACE_REPO }} - - # Create .gitattributes for LFS BEFORE copying files - echo "*.pt filter=lfs diff=lfs merge=lfs -text" > .gitattributes - echo "*.pth filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - echo "*.bin filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - echo "*.h5 filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - echo "*.onnx filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - - # Copy necessary files - cp ../README.md . + + - name: Sync files into Space + run: | + cd space_repo + + # Ensure LFS tracks model file types + cat > .gitattributes <<'EOF' + *.pt filter=lfs diff=lfs merge=lfs -text + *.pth filter=lfs diff=lfs merge=lfs -text + *.bin filter=lfs diff=lfs merge=lfs -text + *.h5 filter=lfs diff=lfs merge=lfs -text + *.onnx filter=lfs diff=lfs merge=lfs -text + EOF + + # Sync application files cp ../app.py . cp ../requirements.txt . cp ../Dockerfile . cp ../yolo_utils.py . + cp ../README.md . cp -r ../templates . cp -r ../static . - - # Copy all .pt and .onnx files from the root directory - echo "Checking for model files..." - find .. -maxdepth 1 -type f -name "*.pt" -exec cp {} . \; - find .. -maxdepth 1 -type f -name "*.onnx" -exec cp {} . \; - - # List what we're about to commit + + # Sync model weight files + find .. -maxdepth 1 -type f \( -name "*.pt" -o -name "*.onnx" \) -exec cp {} . \; + echo "Files to be committed:" ls -lh - - # Add and commit - git add .gitattributes + + - name: Commit and push + run: | + cd space_repo + echo "--- Remote URL ---" + git remote -v + echo "--- Current HEAD ---" + git log --oneline -3 git add . - git config user.name "GitHub Actions" - git config user.email "actions@github.com" - git commit -m "Update NemaQuant app from GitHub Actions - ${{ github.sha }}" - - # Push to Hugging Face Space (force push to main branch) - git push --force origin main + if git diff --cached --quiet; then + echo "No changes to deploy." + else + git commit -m "deploy: sync from GitHub ${{ github.sha }}" + git push -v origin main + fi - name: Summary run: | echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY - echo "- **Hugging Face Space**: \`https://huggingface.co/spaces/${{ secrets.HUGGINGFACE_SPACE_REPO }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Space**: [spaces/$HF_SPACE_REPO](https://huggingface.co/spaces/$HF_SPACE_REPO)" >> $GITHUB_STEP_SUMMARY echo "- **Commit**: \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Status**: Successfully deployed to HF Spaces" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "### Model Files:" >> $GITHUB_STEP_SUMMARY + echo "### Model files deployed:" >> $GITHUB_STEP_SUMMARY cd space_repo for file in *.pt *.onnx; do - if [ -f "$file" ]; then - echo "- ✅ $file ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY - fi + [ -f "$file" ] && echo "- \`$file\` ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY + done + for file in *.pt *.onnx; do + [ -f "$file" ] && echo "- \`$file\` ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY done diff --git a/.github/workflows/update-dockerhub-meta.yml b/.github/workflows/update-dockerhub-meta.yml new file mode 100644 index 0000000..eddd675 --- /dev/null +++ b/.github/workflows/update-dockerhub-meta.yml @@ -0,0 +1,58 @@ +name: Update Docker Hub Metadata + +on: + push: + branches: [ main ] + paths: + - README.md # re-run when README changes + - .github/workflows/update-dockerhub-meta.yml + workflow_dispatch: + +env: + DOCKERHUB_REPO: breedinginsight/nemaquant + +jobs: + update-meta: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Update Docker Hub description + uses: peter-evans/dockerhub-description@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + repository: ${{ env.DOCKERHUB_REPO }} + short-description: "YOLO-based nematode egg detection with real-time processing" + readme-filepath: ./README.md # used as the full (long) description on Docker Hub + + - name: Set Docker Hub category via API + run: | + # Authenticate and get JWT token + TOKEN=$(curl -s -X POST "https://hub.docker.com/v2/users/login" \ + -H "Content-Type: application/json" \ + -d "{\"username\": \"${{ secrets.DOCKERHUB_USERNAME }}\", \"password\": \"${{ secrets.DOCKERHUB_PASSWORD }}\"}" \ + | jq -r '.token') + + if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ]; then + echo "ERROR: Failed to authenticate with Docker Hub" >&2 + exit 1 + fi + + # Set repository category (Machine Learning) + # Full list: https://hub.docker.com/search?categories= + curl -s -X PATCH "https://hub.docker.com/v2/repositories/${{ env.DOCKERHUB_REPO }}/" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"categories": [{"name": "Machine Learning"}]}' \ + | jq . + + - name: Summary + run: | + echo "## Docker Hub Metadata Updated" >> $GITHUB_STEP_SUMMARY + echo "- **Repository**: [hub.docker.com/r/${{ env.DOCKERHUB_REPO }}](https://hub.docker.com/r/${{ env.DOCKERHUB_REPO }})" >> $GITHUB_STEP_SUMMARY + echo "- **Short description**: YOLO-based nematode egg detection with real-time processing" >> $GITHUB_STEP_SUMMARY + echo "- **Full description**: synced from \`README.md\`" >> $GITHUB_STEP_SUMMARY + echo "- **Category**: Machine Learning" >> $GITHUB_STEP_SUMMARY diff --git a/.gitignore b/.gitignore index d3e4fde..562f4a7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__/ uploads/ results/ annotated/ +tests/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..e9237a3 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,63 @@ +Docker image is versioned based on this file. +Please, follow the exact format to track versions and updates. +Add new versions on top of olders. + +## [0.0.2] - 2026-03-19 + +### Fixed — Hugging Face Spaces deployment + +HF Spaces runs the container behind an HTTPS reverse proxy. Flask's default session cookie settings and the 4 KB cookie size limit caused every route after `/uploads` to silently operate on a different session, making processing and preview fail. + +**Root cause:** the HTTPS proxy requires `SameSite=None; Secure` cookies to forward them cross-origin, but even with correct cookie settings the client-side cookie can be dropped when it exceeds ~4 KB (large batches) or when gunicorn assigns a different worker. The real fix was making the session ID travel explicitly in the request body rather than relying solely on the cookie. + +**Changes:** +- `FLASK_SECRET_KEY` read from environment variable — required so the signed cookie is consistent across gunicorn workers and restarts. Falls back to a random key with a warning for local dev +- `SESSION_COOKIE_SECURE=True`, `SESSION_COOKIE_SAMESITE='None'` set automatically when HF Spaces env vars (`SPACE_HOST`, `SPACE_ID`) are detected +- All `fetch()` calls in `static/script.js` include `credentials: 'include'` +- `/uploads` returns `session_id` in its JSON response; JS stores it as `uploadSessionId` +- Every subsequent request sends `uploadSessionId` back explicitly: as a form field (`/process`), query param (`/progress`), or JSON body field (`/preview`, `/annotate`, `/export_csv`, `/export_images`) +- All server routes use `client_session_id or session['id']` — client-supplied id is authoritative since it came directly from the `/uploads` response +- `filename_map` and `uuid_map_to_uuid_imgname` persisted to `/tmp/nemaquant/sessions//meta.json` at upload time and loaded from disk in all routes as fallback when the cookie data is missing or truncated + +### Fixed — GPU: "Cannot re-initialize CUDA in forked subprocess" + +- `multiprocessing.Pool` (which uses `fork` by default) copies the parent's CUDA context into child processes, causing a crash when CUDA was already initialized at startup +- GPU path now uses `concurrent.futures.ThreadPoolExecutor` — threads share the parent's CUDA context without re-initializing it +- GPU model (`_gpu_model`) loaded once at startup; CPU model loaded per-worker via `init_worker()` as before + +### Fixed — Apptainer / Singularity compatibility + +- `CMD` uses absolute path `/home/user/app/app.py` — Apptainer ignores `WORKDIR` and uses the host's cwd, causing "No such file or directory" at startup +- `PYTHONPATH=/home/user/.local/lib/python3.12/site-packages` baked into the image — `--cleanenv` resets `HOME` so pip user packages were not found (`ModuleNotFoundError: No module named 'cv2'`) +- `YOLO_CONFIG_DIR` moved to `/tmp/nemaquant/.yolo_config` — the SIF image is read-only, ultralytics could not write its cache to the image layer + +### Fixed — Docker image layer format (Windows compatibility) + +- Removed `compression=zstd,oci-mediatypes=true` from the CI build — zstd layers with OCI media types cause `failed to register layer: invalid tar header` on Windows Docker Desktop and Apptainer regardless of engine version. Reverted to default gzip (Docker schema v2) + +### Fixed — Drag-and-drop file upload not working + +- Drag-and-drop called `handleFiles()` (which set the valid file list) but never called `/uploads`, so no files were on the server when "Start Processing" was clicked. The `fileInput.files = files` assignment at the end of `handleFiles()` was a silent no-op — `fileInput.files` is read-only +- Extracted the `/uploads` fetch into a shared `uploadFilesToServer()` function; both the file-picker `change` event and the `drop` event now call it + +### Changed — Runtime data directories + +- `uploads/`, `results/`, `annotated/`, `.yolo_config/` moved from `/home/user/app/` (baked into the image layer) to `/tmp/nemaquant/` — avoids overlay filesystem write errors on HF Spaces and read-only filesystem errors on Apptainer. All directories created at app startup + +## [0.0.1] - 2026-03-19 + +- Docker images split by CPU or GPU usage + - CPU image (`Dockerfile`): ~2.3 GB, based on `python:3.12-slim` + - GPU image (`Dockerfile.gpu`): ~10 GB, based on `nvidia/cuda:12.8.1-base-ubuntu24.04` +- CPU image uses CPU-only torch wheel (~250 MB vs ~2.5 GB CUDA wheel) +- GPU image uses CUDA 12.8 torch wheel; cuDNN bundled inside torch, no `cudnn-runtime` base needed +- Switched from `opencv-python` to `opencv-python-headless` (server environment, no display needed) +- Force-reinstall `opencv-python-headless` after `ultralytics` to prevent full opencv being pulled as transitive dependency +- Removed unnecessary apt packages (`libgl1`, `libsm6`, `libxrender1`, `libxext6`) — only needed by full opencv +- Fixed `YOLO_CONFIG_DIR` to use absolute path +- Fixed `PATH` to correctly point to `/home/user/.local/bin` +- Updated CI/CD GitHub Actions workflows: + - `build-images.yml`: builds and pushes both CPU and GPU images to Docker Hub on push to `main`, tagged with version from `CHANGELOG.md` and `latest` + - `deploy-to-hf.yml`: syncs app files and model weights to Hugging Face Space on push to `main`, using Git LFS for weight files +- Added `update-dockerhub-meta.yml` to make a pretty Dockerhub description based on the `README.md` +- Fixed `uploads/`, `results/`, `annotated/` directories not being created at runtime on HF Spaces — re-enabled `mkdir` calls in `app.py` at startup (HF container filesystem can overlay image build dirs) diff --git a/Dockerfile b/Dockerfile index 7d1eb65..b7a2745 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,10 @@ -# Use an official Python runtime as a parent image -FROM python:3.12 -# FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 +# CPU image - use Dockerfile.gpu for GPU support +FROM python:3.12.13-slim-trixie +# Cache bust: 2026-03-19 # run updates before switching over to non-root user RUN apt-get update && apt-get install -y --no-install-recommends \ - libgl1 \ libglib2.0-0 \ - libsm6 \ - libxrender1 \ - libxext6 \ && rm -rf /var/lib/apt/lists/* # add new user with ID 1000 to avoid permission issues on HF spaces @@ -16,8 +12,11 @@ RUN useradd -m -u 1000 user USER user # Set home to user's home dir and add local bin to PATH +# PYTHONPATH is set explicitly so packages are found even when HOME is overridden +# (e.g. by Apptainer --cleanenv, which resets HOME to the host user's home) ENV HOME=/home/user \ - PATH=/user/user/.local/bin:$PATH + PATH=/home/user/.local/bin:$PATH \ + PYTHONPATH=/home/user/.local/lib/python3.12/site-packages # Set the working directory in the container WORKDIR $HOME/app @@ -25,7 +24,10 @@ WORKDIR $HOME/app # Try and run pip command after setting the user with `USER user` to avoid permission issues with Python # NOTE - this is from the HF Spaces docs, not sure if necessary COPY --chown=user ./requirements.txt . -RUN pip install --no-cache-dir --upgrade -r requirements.txt +RUN pip install --no-cache-dir torch==2.7.1 torchvision --index-url https://download.pytorch.org/whl/cpu +RUN pip install --no-cache-dir --only-binary :all: -r requirements.txt +# Force headless opencv after ultralytics (which pulls in full opencv-python as a dependency) +RUN pip install --no-cache-dir --force-reinstall opencv-python-headless==4.13.0.92 # Copy the current directory contents into the container at $HOME/app setting the owner to the user COPY --chown=user . $HOME/app @@ -39,8 +41,10 @@ COPY --chown=user . $HOME/app # we should not need to chown, since we are using USER user above RUN mkdir -p uploads results annotated .yolo_config -# set the env var for YOLO user config directory -ENV YOLO_CONFIG_DIR=.yolo_config +# Point YOLO config to /tmp so it is writable under Apptainer (read-only SIF) +# and HF Spaces. /home/user/app/.yolo_config is kept in the image but only used +# as a fallback when the container filesystem is writable (plain Docker). +ENV YOLO_CONFIG_DIR=/tmp/nemaquant/.yolo_config # Copy the rest of the application code into the container at /app # This includes app.py, nemaquant.py, templates/, static/, etc. @@ -58,4 +62,4 @@ EXPOSE 7860 # Use gunicorn for production deployment if preferred over Flask's development server # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"] # For simplicity during development and typical HF Spaces use: -CMD ["python", "app.py"] \ No newline at end of file +CMD ["python", "/home/user/app/app.py"] \ No newline at end of file diff --git a/Dockerfile.gpu b/Dockerfile.gpu new file mode 100644 index 0000000..9b85d8f --- /dev/null +++ b/Dockerfile.gpu @@ -0,0 +1,53 @@ +# GPU image following HF Spaces guidelines: https://huggingface.co/docs/hub/spaces-sdks-docker +# Use: docker build -f Dockerfile.gpu -t cristaniguti/nemaquant:gpu . +# Requires nvidia-container-toolkit on the host (provided by HF Spaces GPU instances). +# PyTorch CUDA wheels bundle their own cuDNN/CUDA libs, so base variant is sufficient. +FROM nvidia/cuda:12.8.1-base-ubuntu24.04 + +# Install Python 3.12 and pip +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3-pip \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Make python3.12 the default python +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 + +# add new user with ID 1000 to avoid permission issues on HF spaces +# Rename the existing UID 1000 user ('ubuntu') to 'user' for HF Spaces compatibility +RUN usermod -l user ubuntu && usermod -d /home/user -m user +USER user + +# Set home to user's home dir and add local bin to PATH +# PYTHONPATH is set explicitly so packages are found even when HOME is overridden +# (e.g. by Apptainer --cleanenv, which resets HOME to the host user's home) +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH \ + PYTHONPATH=/home/user/.local/lib/python3.12/site-packages + +# Set the working directory in the container +WORKDIR $HOME/app + +COPY --chown=user ./requirements.txt . +RUN pip install --no-cache-dir --break-system-packages torch==2.7.1 torchvision --index-url https://download.pytorch.org/whl/cu128 +RUN pip install --no-cache-dir --break-system-packages --only-binary :all: -r requirements.txt +# Force headless opencv after ultralytics (which pulls in full opencv-python as a dependency) +RUN pip install --no-cache-dir --break-system-packages --force-reinstall opencv-python-headless==4.13.0.92 + +# Copy the current directory contents into the container +COPY --chown=user . $HOME/app + +# Create the necessary dirs +RUN mkdir -p uploads results annotated .yolo_config + +# Point YOLO config to /tmp so it is writable under Apptainer (read-only SIF) +# and HF Spaces. /home/user/app/.yolo_config is kept in the image but only used +# as a fallback when the container filesystem is writable (plain Docker). +ENV YOLO_CONFIG_DIR=/tmp/nemaquant/.yolo_config + +EXPOSE 7860 + +CMD ["python", "/home/user/app/app.py"] diff --git a/README.md b/README.md index d426d0f..38b32bb 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ emoji: 🔬 colorFrom: indigo colorTo: blue sdk: docker +dockerfile: Dockerfile.gpu license: apache-2.0 short_description: "YOLO-based nematode egg detection with real-time processing" tags: @@ -104,16 +105,49 @@ Process 500 images for: ``` The application will be available at `http://localhost:7860` -### Docker Deployment +### Container Deployment 1. **Build the Container**: + +DEfault image `breedinginsight/nemaquant` is exclusive for **CPU usage**. +For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` + +- With Docker + ```bash - docker build -t nemaquant-flask . + docker pull breedinginsight/nemaquant + ``` + +- With Apptainer/Singularity + Slurm from a server: + + ```bash + # 1) On the login node: pull the image once (creates a .sif file) + apptainer pull nemaquant_latest.sif docker://breedinginsight/nemaquant:latest + + # 2) Request an interactive compute allocation (adjust for your cluster and analysis) + salloc -c 4 --mem=16G --time=02:00:00 + + # 3) On the compute node shell that opens, run the app on port 7860 + export PORT=7860 + apptainer run --cleanenv --env PORT=$PORT nemaquant_latest.sif ``` +For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` and the option `--nv` for apptainer/singularity run. + 2. **Run the Container**: + +- With Docker + ```bash - docker run -p 7860:7860 -v $(pwd)/results:/app/results nemaquant-flask + docker run -p 7860:7860 breedinginsight/nemaquant + ``` + +- With Apptainer/Singularity + Slurm from our local computer (after running the above commands on server): + + ```bash + # Replace user and host with your cluster login node. + # If your cluster requires a direct tunnel to the compute node, adapt accordingly. + ssh -L 7860:localhost:7860 [userID]@[yourcluster.address] ``` ### Hugging Face Spaces Deployment @@ -190,7 +224,3 @@ Process 500 images for: - Time of day (free tier performance varies with overall platform usage) For most users, the free tier is sufficient for small to medium batches (< 200 images), while the CPU upgrade offers a good balance of cost and performance for larger datasets. GPU options are recommended only for time-sensitive processing of large batches or when processing thousands of images. - -## License - -[Specify your license here] diff --git a/app.py b/app.py index 133b81b..de62300 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,6 @@ import os import uuid +import json import traceback import sys import time @@ -11,13 +12,12 @@ import shutil import logging from ultralytics import YOLO -# from ultralytics.utils import ThreadingLocked import numpy as np import pandas as pd from torch import cuda from flask import Flask, Response, render_template, request, jsonify, send_file, session from multiprocessing.pool import Pool -from multiprocessing import set_start_method +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from PIL import Image from datetime import datetime @@ -25,17 +25,36 @@ from yolo_utils import detect_in_image app = Flask(__name__) -app.secret_key = os.environ.get('FLASK_SECRET_KEY', str(uuid.uuid4())) # For session security - -# disable werkzeug logging - too noisy +_secret_key = os.environ.get('FLASK_SECRET_KEY') +if not _secret_key: + # Fallback for local dev only — sessions won't persist across restarts. + # On HF Spaces, set FLASK_SECRET_KEY as a Space secret to avoid session loss between workers. + _secret_key = str(uuid.uuid4()) + print("WARNING: FLASK_SECRET_KEY not set — using random key. Sessions will break across workers/restarts.") +else: + print(f"INFO: FLASK_SECRET_KEY is set (length={len(_secret_key)})") +app.secret_key = _secret_key + +# HF Spaces serves over HTTPS via a reverse proxy and may embed the app in an iframe. +# SameSite=None;Secure is required so cookies are sent in cross-site/iframe POST requests. +# HF sets SPACE_HOST env var; fall back to checking SPACE_ID or SPACE_AUTHOR_NAME. +_on_https = any(os.environ.get(v) for v in ('SPACE_HOST', 'SPACE_ID', 'SPACE_AUTHOR_NAME')) +app.config['SESSION_COOKIE_SECURE'] = _on_https +app.config['SESSION_COOKIE_SAMESITE'] = 'None' if _on_https else 'Lax' +app.config['SESSION_COOKIE_HTTPONLY'] = True +print(f"INFO: SESSION_COOKIE_SECURE={_on_https}, SAMESITE={'None' if _on_https else 'Lax'}") # comment out these lines if you want to see full logs log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) APP_ROOT = Path(__file__).parent -UPLOAD_FOLDER = APP_ROOT / 'uploads' -RESULTS_FOLDER = APP_ROOT / 'results' -ANNOT_FOLDER = APP_ROOT / 'annotated' +# Use /tmp for runtime data — works reliably on all container platforms. +# /tmp is RAM-backed tmpfs, always writable, avoids overlay filesystem issues on HF Spaces. +# Note: /tmp is cleared on container restart (uploads/results are transient by design). +UPLOAD_FOLDER = Path('/tmp/nemaquant/uploads') +RESULTS_FOLDER = Path('/tmp/nemaquant/results') +ANNOT_FOLDER = Path('/tmp/nemaquant/annotated') +SESSION_META_FOLDER = Path('/tmp/nemaquant/sessions') WEIGHTS_FILE = APP_ROOT / 'weights.pt' app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER) app.config['RESULTS_FOLDER'] = str(RESULTS_FOLDER) @@ -43,17 +62,61 @@ app.config['WEIGHTS_FILE'] = str(WEIGHTS_FILE) app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'tif', 'tiff'} -# skip these -- created dirs in dockerfile -# UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) -# RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) -# ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) +# Create dirs at startup +UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) +RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) +ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) +SESSION_META_FOLDER.mkdir(parents=True, exist_ok=True) +# YOLO_CONFIG_DIR points to /tmp/nemaquant/.yolo_config (set in Dockerfile ENV). +# Create it here so ultralytics can write its cache on read-only container filesystems +# (e.g. Apptainer SIF images). +Path(os.environ.get('YOLO_CONFIG_DIR', '/tmp/nemaquant/.yolo_config')).mkdir(parents=True, exist_ok=True) +print(f"Data root: /tmp/nemaquant | Weights: {WEIGHTS_FILE}") + +# --------------------------------------------------------------------------- +# Session metadata helpers +# Flask's client-side cookie is limited to ~4KB. When many images are +# uploaded, filename_map / uuid_map_to_uuid_imgname can overflow. +# We persist them to disk so every route can recover them even when the +# cookie is absent or truncated (e.g. large batches, Apptainer --cleanenv, +# multi-worker gunicorn). +# --------------------------------------------------------------------------- +def _save_session_meta(session_id, filename_map, uuid_map): + meta_dir = SESSION_META_FOLDER / session_id + meta_dir.mkdir(parents=True, exist_ok=True) + with open(meta_dir / 'meta.json', 'w') as fh: + json.dump({'filename_map': filename_map, 'uuid_map_to_uuid_imgname': uuid_map}, fh) + +def _load_session_meta(session_id): + meta_path = SESSION_META_FOLDER / session_id / 'meta.json' + if meta_path.exists(): + with open(meta_path) as fh: + return json.load(fh) + return {} # Load model once at startup, use CUDA if available MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu' -# need a global dict to hold async results objects -# so you can check the progress of an abr -# maybe there's a better way around this? +# For GPU: load the model globally at startup so threads can reuse it without +# re-initialising CUDA (forked Pool workers cannot re-init CUDA in the child). +# For CPU: model is loaded per-worker in init_worker() instead. +_gpu_model = None +if MODEL_DEVICE == 'cuda': + _gpu_model = YOLO(str(WEIGHTS_FILE)) + _gpu_model.to('cuda') + print(f'GPU model loaded at startup on {MODEL_DEVICE}') + +# Wrapper so GPU futures (concurrent.futures.Future) expose the same +# .ready() interface as multiprocessing AsyncResult. +class _FutureWrapper: + def __init__(self, future): + self._f = future + def ready(self): + return self._f.done() + def get(self): + return self._f.result() + +# Global dict mapping session_id -> async result (Pool AsyncResult or _FutureWrapper) async_results = {} @app.errorhandler(Exception) @@ -62,9 +125,6 @@ def handle_exception(e): print(traceback.format_exc()) return jsonify({"error": "Server error", "log": str(e)}), 500 -# def allowed_file(filename): -# return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS'] - @app.route('/') def index(): return render_template('index.html') @@ -93,7 +153,9 @@ def upload_files(): uuid_map_to_uuid_imgname[uuid_base] = uuid_name session['filename_map'] = filename_map session['uuid_map_to_uuid_imgname'] = uuid_map_to_uuid_imgname - return jsonify({'filename_map': filename_map, 'status': 'uploaded'}) + # Persist to disk — cookie may be silently dropped if it exceeds ~4KB + _save_session_meta(session_id, filename_map, uuid_map_to_uuid_imgname) + return jsonify({'filename_map': filename_map, 'session_id': session_id, 'status': 'uploaded'}) # /preview route for serving original uploaded image @app.route('/preview', methods=['POST']) @@ -101,8 +163,10 @@ def preview_image(): try: data = request.get_json() uuid = data.get('uuid') - session_id = session['id'] - uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) + # Prefer client-supplied session_id (cookie may differ on HF Spaces HTTPS proxy) + session_id = data.get('session_id') or session['id'] + _meta = _load_session_meta(session_id) + uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) if not img_name: print(f"/preview: No img_name found for uuid {uuid}") @@ -132,15 +196,12 @@ def preview_image(): return jsonify({'error': str(e)}), 500 # initializer for Pool to load model in each process -# each worker will have its own model instance +# each worker will have its own model instance (CPU only) def init_worker(model_path): global model model = YOLO(model_path) - if MODEL_DEVICE == 'cuda': - model.to('cuda') -# not sure if we need this decorator anymore? -#@ThreadingLocked() +# CPU pool worker — uses per-worker model loaded by init_worker() def process_single_image(img_path, results_dir): global model uuid_base = img_path.stem @@ -150,9 +211,31 @@ def process_single_image(img_path, results_dir): pickle.dump(results, pf) return uuid_base +# GPU thread worker — reuses the global _gpu_model loaded at startup +def process_single_image_thread(img_path, results_dir): + global _gpu_model + uuid_base = img_path.stem + pickle_path = results_dir / f"{uuid_base}.pkl" + results = detect_in_image(_gpu_model, str(img_path)) + with open(pickle_path, 'wb') as pf: + pickle.dump(results, pf) + return uuid_base + @app.route('/process', methods=['POST']) def start_processing(): session_id = session['id'] + # The client echoes back the session_id it received from /uploads. + # On HF Spaces the session cookie can be missing on subsequent requests + # (HTTPS proxy / SameSite), so we fall back to the client-supplied id + # when the cookie-based id doesn't have an upload directory. + client_session_id = request.form.get('session_id', '') + if client_session_id: + # Prefer the client-supplied id unconditionally — it's the authoritative + # id from the /uploads call; the cookie may point to a different worker session. + client_dir = Path(app.config['UPLOAD_FOLDER']) / client_session_id + if client_dir.exists() or not (Path(app.config['UPLOAD_FOLDER']) / session_id).exists(): + session_id = client_session_id + session['id'] = session_id job_state = { "status": "starting", "progress": 0, @@ -161,31 +244,43 @@ def start_processing(): session['job_state'] = job_state upload_dir = Path(app.config['UPLOAD_FOLDER']) / session_id results_dir = Path(app.config['RESULTS_FOLDER']) / session_id - # clean out old results if needed - if results_dir.exists(): - shutil.rmtree(results_dir) - results_dir.mkdir(parents=True) - - # set up iterable of uploaded files to process - arg_list = [(x,results_dir) for x in list(upload_dir.iterdir())] try: + # Fail fast with a clear message if the upload directory is missing + if not upload_dir.exists(): + available = [d.name for d in Path(app.config['UPLOAD_FOLDER']).iterdir()] \ + if Path(app.config['UPLOAD_FOLDER']).exists() else [] + msg = (f"Upload directory not found: {upload_dir}. " + f"cookie_session={session['id']}, client_session={request.form.get('session_id','')}, " + f"available={available}") + print(f"ERROR /process: {msg}") + return jsonify({'error': msg}), 500 + + # clean out old results if needed + if results_dir.exists(): + shutil.rmtree(results_dir) + results_dir.mkdir(parents=True) + + # set up iterable of uploaded files to process + arg_list = [(x, results_dir) for x in list(upload_dir.iterdir())] + if MODEL_DEVICE == 'cuda': - n_proc = 1 + # GPU: run in a single thread so CUDA is never re-initialised in a + # forked subprocess (Pool uses fork by default, which breaks CUDA). + def _gpu_task(): + for img_path, res_dir in arg_list: + process_single_image_thread(img_path, res_dir) + executor = ThreadPoolExecutor(max_workers=1) + future = executor.submit(_gpu_task) + executor.shutdown(wait=False) + async_results[session_id] = _FutureWrapper(future) else: n_proc = os.cpu_count() - # Initialize job state - job_state = { - "status": "starting", - "progress": 0, - "started": True - } - session['job_state'] = job_state - pool = Pool(processes=n_proc, - initializer=init_worker, - initargs=(str(WEIGHTS_FILE),)) - async_results[session_id] = pool.starmap_async(process_single_image, arg_list) - pool.close() + pool = Pool(processes=n_proc, + initializer=init_worker, + initargs=(str(WEIGHTS_FILE),)) + async_results[session_id] = pool.starmap_async(process_single_image, arg_list) + pool.close() # Update job state after process launch job_state["status"] = "processing" @@ -203,8 +298,16 @@ def start_processing(): @app.route('/progress') def get_progress(): session_id = session['id'] + # Accept client-supplied session_id as fallback (cookie may be missing on HF Spaces) + client_session_id = request.args.get('session_id', '') + if client_session_id and session_id not in async_results and client_session_id in async_results: + session_id = client_session_id + session['id'] = session_id try: job_state = session.get('job_state') + # If session lost job_state but we have an async_result, reconstruct from disk + if not job_state and session_id in async_results: + job_state = {'status': 'processing', 'progress': 0, 'sessionId': session_id} if not job_state: print("/progress: No job_state found in session.") return jsonify({"status": "error", "error": "No job state"}), 404 @@ -221,10 +324,12 @@ def get_progress(): job_state['status'] = 'completed' job_state['progress'] = 100 session['job_state'] = job_state + _meta = _load_session_meta(session_id) + _filename_map = session.get('filename_map') or _meta.get('filename_map', {}) resp = { 'status': 'completed', 'progress': 100, - 'filename_map': session.get('filename_map', {}), + 'filename_map': _filename_map, 'session_id': job_state.get('sessionId'), 'error': job_state.get('error'), } @@ -277,10 +382,12 @@ def annotate_image(): data = request.get_json() uuid = data.get('uuid') confidence = float(data.get('confidence', 0.5)) - session_id = session['id'] - uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) + # Prefer client-supplied session_id (cookie may differ on HF Spaces HTTPS proxy) + session_id = data.get('session_id') or session['id'] + _meta = _load_session_meta(session_id) + uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) - orig_img_name = session['filename_map'].get(uuid) + orig_img_name = (session.get('filename_map') or _meta.get('filename_map', {})).get(uuid) if not img_name: return jsonify({'error': 'File not found'}), 404 @@ -316,9 +423,10 @@ def export_images(): try: data = request.get_json() confidence = float(data.get('confidence', 0.5)) - session_id = session['id'] - filename_map = session.get('filename_map', {}) - uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) + session_id = data.get('session_id') or session['id'] + _meta = _load_session_meta(session_id) + filename_map = session.get('filename_map') or _meta.get('filename_map', {}) + uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) # ensure there's a landing spot annot_dir = Path(app.config['ANNOT_FOLDER']) / session_id annot_dir.mkdir(parents=True, exist_ok=True) @@ -366,9 +474,10 @@ def export_images(): def export_csv(): try: data = request.json - session_id = session['id'] + session_id = data.get('session_id') or session['id'] job_state = session.get('job_state') - filename_map = session.get('filename_map') + _meta = _load_session_meta(session_id) + filename_map = session.get('filename_map') or _meta.get('filename_map', {}) threshold = float(data.get('confidence', 0.5)) if not job_state: return jsonify({'error': 'Job not found'}), 404 @@ -386,7 +495,7 @@ def export_csv(): rows = [] for uuid in all_results.keys(): count = sum(1 for d in all_results[uuid] if d['score'] >= threshold) - rows.append({'Filename': filename_map[uuid], 'EggsDetected': count, 'ConfidenceThreshold': threshold}) + rows.append({'Filename': filename_map.get(uuid, uuid), 'EggsDetected': count, 'ConfidenceThreshold': threshold}) rows = sorted(rows, key=lambda x: x['Filename'].lower()) # write the CSV out timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') @@ -411,10 +520,6 @@ def export_csv(): def ensure_session(): if 'id' not in session: session['id'] = uuid.uuid4().hex - print(f"New session started: {session['id']}") - else: - pass - # print(f"Existing session: {session['id']}") def print_startup_info(): @@ -441,16 +546,15 @@ def print_startup_info(): except AttributeError: print("User running process: UID/GID not available on this OS") - for path_str in ["/app/uploads", "/app/results"]: - path_obj = Path(path_str) + for path_obj in [UPLOAD_FOLDER, RESULTS_FOLDER, ANNOT_FOLDER]: if path_obj.exists(): stat_info = path_obj.stat() permissions = oct(stat_info.st_mode)[-3:] owner = f"{stat_info.st_uid}:{stat_info.st_gid}" - print(f"Permissions for {path_str}: {permissions}") - print(f"Owner for {path_str}: {owner}") + print(f"Permissions for {path_obj}: {permissions}") + print(f"Owner for {path_obj}: {owner}") else: - print(f"Directory {path_str} does not exist.") + print(f"Directory {path_obj} does not exist.") # some cleanup steps - not sure quite where to put these print('Running periodic cleanup of old sessions...') diff --git a/requirements.txt b/requirements.txt index adc25f6..5340189 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ Flask==3.1.1 numpy==2.2.6 -opencv_python==4.12.0.88 +opencv_python-headless==4.13.0.92 pandas==2.3.1 Pillow==11.3.0 torch==2.7.1 ultralytics==8.3.170 watchdog==6.0.0 -Werkzeug==3.1.3 -gunicorn==21.2.0 \ No newline at end of file +Werkzeug==3.1.3 \ No newline at end of file diff --git a/static/script.js b/static/script.js index d8353a6..ab8cfd5 100644 --- a/static/script.js +++ b/static/script.js @@ -33,6 +33,7 @@ document.addEventListener('DOMContentLoaded', () => { let currentJobId = null; let currentZoomLevel = 1; let filenameMap = {}; + let uploadSessionId = ''; // echoed back to /process as cookie-independent fallback const MAX_ZOOM = 3; const MIN_ZOOM = 0.5; let progressInterval = null; // Interval timer for polling @@ -161,7 +162,6 @@ document.addEventListener('DOMContentLoaded', () => { filteredValidFiles = validFiles; const invalidFiles = Array.from(files).filter(file => !allowedTypes.includes(file.type)); - // Only print invalid file warnings if not in Keyence mode if (invalidFiles.length > 0 && inputMode.value !== 'keyence') { logStatus(`Warning: Skipped ${invalidFiles.length} invalid files. Only PNG, JPG, and TIFF are supported.`); @@ -188,7 +188,6 @@ document.addEventListener('DOMContentLoaded', () => { `; fileList.appendChild(summaryDiv); - fileInput.files = files; updateUploadState(validFiles.length); } @@ -227,9 +226,10 @@ document.addEventListener('DOMContentLoaded', () => { dropZone.classList.remove('drag-over'); } - dropZone.addEventListener('drop', (e) => { + dropZone.addEventListener('drop', async (e) => { const dt = e.dataTransfer; handleFiles(dt.files); + await uploadFilesToServer(); }); // Click to upload @@ -237,47 +237,50 @@ document.addEventListener('DOMContentLoaded', () => { fileInput.click(); }); - fileInput.addEventListener('change', async () => { - handleFiles(fileInput.files); - if (filteredValidFiles && filteredValidFiles.length > 0) { - // Prepare FormData for upload - const formData = new FormData(); - filteredValidFiles.forEach(f => formData.append('files', f)); - try { - const response = await fetch('/uploads', { - method: 'POST', - body: formData + async function uploadFilesToServer() { + if (!filteredValidFiles || filteredValidFiles.length === 0) return; + const formData = new FormData(); + filteredValidFiles.forEach(f => formData.append('files', f)); + try { + const response = await fetch('/uploads', { + method: 'POST', + credentials: 'include', + body: formData + }); + if (response.ok) { + const data = await response.json(); + logStatus('Files uploaded successfully.'); + filenameMap = data.filename_map || {}; + uploadSessionId = data.session_id || ''; + + // Update results table with filenames and View buttons + resultsTableBody.innerHTML = ''; + Object.entries(filenameMap).forEach(([uuid, originalFilename], idx) => { + const row = resultsTableBody.insertRow(); + row.dataset.originalIndex = idx; + row.innerHTML = ` + ${originalFilename} + NA + + `; }); - if (response.ok) { - const data = await response.json(); - logStatus('Files uploaded successfully.'); - filenameMap = data.filename_map || {}; - - // Update results table with filenames and View buttons - resultsTableBody.innerHTML = ''; - Object.entries(filenameMap).forEach(([uuid, originalFilename], idx) => { - const row = resultsTableBody.insertRow(); - row.dataset.originalIndex = idx; - row.innerHTML = ` - ${originalFilename} - NA - - `; - }); - // Add click event for View buttons - resultsTableBody.querySelectorAll('.view-button').forEach(btn => { - btn.addEventListener('click', (e) => { - const idx = parseInt(btn.dataset.index, 10); - displayImage(idx); - }); + resultsTableBody.querySelectorAll('.view-button').forEach(btn => { + btn.addEventListener('click', (e) => { + const idx = parseInt(btn.dataset.index, 10); + displayImage(idx); }); - } else { - logStatus('File upload failed.'); - } - } catch (err) { - logStatus('Error uploading files: ' + err); + }); + } else { + logStatus('File upload failed.'); } + } catch (err) { + logStatus('Error uploading files: ' + err); } + } + + fileInput.addEventListener('change', async () => { + handleFiles(fileInput.files); + await uploadFilesToServer(); }); // Input mode change @@ -357,10 +360,14 @@ document.addEventListener('DOMContentLoaded', () => { } formData.append('input_mode', mode); formData.append('confidence_threshold', confidenceSlider.value); + // Send back the session_id from /uploads so the server can recover the + // correct upload directory when the session cookie is missing (HF Spaces). + if (uploadSessionId) formData.append('session_id', uploadSessionId); try { const response = await fetch('/process', { method: 'POST', + credentials: 'include', body: formData, }); if (!response.ok) { @@ -482,14 +489,16 @@ document.addEventListener('DOMContentLoaded', () => { if (isCompleted) { response = await fetch('/annotate', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ uuid: uuid, confidence }) + body: JSON.stringify({ uuid: uuid, confidence, session_id: uploadSessionId }) }); } else { response = await fetch('/preview', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ uuid: uuid }) + body: JSON.stringify({ uuid: uuid, session_id: uploadSessionId }) }); } if (response.ok) { @@ -537,7 +546,7 @@ document.addEventListener('DOMContentLoaded', () => { progressInterval = setInterval(async () => { try { - const response = await fetch(`/progress`); + const response = await fetch(`/progress?session_id=${encodeURIComponent(uploadSessionId)}`, { credentials: 'include' }); if (!response.ok) { let errorText = `Progress check failed: ${response.status}`; try { @@ -1055,8 +1064,9 @@ document.addEventListener('DOMContentLoaded', () => { try { const resp = await fetch('/export_csv', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ confidence: threshold }) + body: JSON.stringify({ confidence: threshold, session_id: uploadSessionId }) }); if (!resp.ok) throw new Error('Failed to export CSV'); const blob = await resp.blob(); @@ -1092,8 +1102,9 @@ document.addEventListener('DOMContentLoaded', () => { logStatus('Preparing annotated images for download...'); const resp = await fetch('/export_images', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ confidence: threshold }) + body: JSON.stringify({ confidence: threshold, session_id: uploadSessionId }) }); if (!resp.ok) throw new Error('Failed to export images'); const blob = await resp.blob();