From 68d14d2bfc0d62a8ef8f0c51789685a3f40a9f3f Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 09:57:59 -0400 Subject: [PATCH 01/24] gpu docker --- .dockerignore | 9 +++++++++ .gitignore | 1 + Dockerfile | 18 ++++++++---------- Dockerfile.gpu | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 1 + requirements.txt | 5 ++--- 6 files changed, 69 insertions(+), 13 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile.gpu diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f4a83ea --- /dev/null +++ b/.dockerignore @@ -0,0 +1,9 @@ +__pycache__/ +*.pyc +.git/ +uploads/ +results/ +annotated/ +tests/ +*.md +.github/ diff --git a/.gitignore b/.gitignore index d3e4fde..562f4a7 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__/ uploads/ results/ annotated/ +tests/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 7d1eb65..08daaf2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,9 @@ -# Use an official Python runtime as a parent image -FROM python:3.12 -# FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 +# CPU image - use Dockerfile.gpu for GPU support +FROM python:3.12.13-slim-trixie # run updates before switching over to non-root user RUN apt-get update && apt-get install -y --no-install-recommends \ - libgl1 \ libglib2.0-0 \ - libsm6 \ - libxrender1 \ - libxext6 \ && rm -rf /var/lib/apt/lists/* # add new user with ID 1000 to avoid permission issues on HF spaces @@ -17,7 +12,7 @@ USER user # Set home to user's home dir and add local bin to PATH ENV HOME=/home/user \ - PATH=/user/user/.local/bin:$PATH + PATH=/home/user/.local/bin:$PATH # Set the working directory in the container WORKDIR $HOME/app @@ -25,7 +20,10 @@ WORKDIR $HOME/app # Try and run pip command after setting the user with `USER user` to avoid permission issues with Python # NOTE - this is from the HF Spaces docs, not sure if necessary COPY --chown=user ./requirements.txt . -RUN pip install --no-cache-dir --upgrade -r requirements.txt +RUN pip install --no-cache-dir torch==2.7.1 torchvision --index-url https://download.pytorch.org/whl/cpu +RUN pip install --no-cache-dir --only-binary :all: -r requirements.txt +# Force headless opencv after ultralytics (which pulls in full opencv-python as a dependency) +RUN pip install --no-cache-dir --force-reinstall opencv-python-headless==4.13.0.92 # Copy the current directory contents into the container at $HOME/app setting the owner to the user COPY --chown=user . $HOME/app @@ -40,7 +38,7 @@ COPY --chown=user . $HOME/app RUN mkdir -p uploads results annotated .yolo_config # set the env var for YOLO user config directory -ENV YOLO_CONFIG_DIR=.yolo_config +ENV YOLO_CONFIG_DIR=$HOME/app/.yolo_config # Copy the rest of the application code into the container at /app # This includes app.py, nemaquant.py, templates/, static/, etc. diff --git a/Dockerfile.gpu b/Dockerfile.gpu new file mode 100644 index 0000000..8230fdd --- /dev/null +++ b/Dockerfile.gpu @@ -0,0 +1,48 @@ +# GPU image following HF Spaces guidelines: https://huggingface.co/docs/hub/spaces-sdks-docker +# Use: docker build -f Dockerfile.gpu -t cristaniguti/nemaquant:gpu . +# Requires nvidia-container-toolkit on the host (provided by HF Spaces GPU instances). +# PyTorch CUDA wheels bundle their own cuDNN/CUDA libs, so base variant is sufficient. +FROM nvidia/cuda:12.8.1-base-ubuntu24.04 + +# Install Python 3.12 and pip +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + python3.12-venv \ + python3-pip \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Make python3.12 the default python +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 \ + && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 + +# add new user with ID 1000 to avoid permission issues on HF spaces +# Rename the existing UID 1000 user ('ubuntu') to 'user' for HF Spaces compatibility +RUN usermod -l user ubuntu && usermod -d /home/user -m user +USER user + +# Set home to user's home dir and add local bin to PATH +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH + +# Set the working directory in the container +WORKDIR $HOME/app + +COPY --chown=user ./requirements.txt . +RUN pip install --no-cache-dir --break-system-packages torch==2.7.1 torchvision --index-url https://download.pytorch.org/whl/cu128 +RUN pip install --no-cache-dir --break-system-packages --only-binary :all: -r requirements.txt +# Force headless opencv after ultralytics (which pulls in full opencv-python as a dependency) +RUN pip install --no-cache-dir --break-system-packages --force-reinstall opencv-python-headless==4.13.0.92 + +# Copy the current directory contents into the container +COPY --chown=user . $HOME/app + +# Create the necessary dirs +RUN mkdir -p uploads results annotated .yolo_config + +# set the env var for YOLO user config directory +ENV YOLO_CONFIG_DIR=$HOME/app/.yolo_config + +EXPOSE 7860 + +CMD ["python", "app.py"] diff --git a/README.md b/README.md index d426d0f..9fd5b1a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ emoji: 🔬 colorFrom: indigo colorTo: blue sdk: docker +dockerfile: Dockerfile.gpu license: apache-2.0 short_description: "YOLO-based nematode egg detection with real-time processing" tags: diff --git a/requirements.txt b/requirements.txt index adc25f6..5340189 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,9 @@ Flask==3.1.1 numpy==2.2.6 -opencv_python==4.12.0.88 +opencv_python-headless==4.13.0.92 pandas==2.3.1 Pillow==11.3.0 torch==2.7.1 ultralytics==8.3.170 watchdog==6.0.0 -Werkzeug==3.1.3 -gunicorn==21.2.0 \ No newline at end of file +Werkzeug==3.1.3 \ No newline at end of file From 75dca7d8598f0ceaf2ed6df8ced10a326e0b178d Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 11:12:20 -0400 Subject: [PATCH 02/24] v0.0.1 --- .github/workflows/build-images.yml | 75 +++++++++---------- .github/workflows/deploy-to-hf.yml | 112 +++++++++++++---------------- CHANGELOG.md | 19 +++++ 3 files changed, 109 insertions(+), 97 deletions(-) create mode 100644 CHANGELOG.md diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 0dc53be..eebb40d 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -2,74 +2,77 @@ name: Build Docker Images on: push: - branches: [ main ] + branches: [ main, docker_beauty ] # TODO: remove docker_beauty before merging workflow_dispatch: env: - REGISTRY: ghcr.io - IMAGE_NAME: ${{ github.repository }} + IMAGE: ${{ secrets.DOCKERHUB_USERNAME }}/nemaquant jobs: build: runs-on: ubuntu-latest - permissions: - contents: read - packages: write + strategy: + fail-fast: false + matrix: + include: + - variant: cpu + dockerfile: Dockerfile + tag_suffix: "" # cpu is the default, no suffix on latest/version tags + - variant: gpu + dockerfile: Dockerfile.gpu + tag_suffix: "-gpu" steps: - name: Checkout repository uses: actions/checkout@v4 + - name: Extract version from CHANGELOG.md + id: version + run: | + VERSION=$(grep -m1 '^## \[' CHANGELOG.md | sed 's/.*\[\(.*\)\].*/\1/') + if [ -z "$VERSION" ]; then + echo "ERROR: Could not extract version from CHANGELOG.md" >&2 + exit 1 + fi + echo "version=$VERSION" >> $GITHUB_OUTPUT + echo "Detected version: $VERSION" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Log in to GitHub Container Registry + - name: Log in to Docker Hub uses: docker/login-action@v3 with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - # - name: Log in to Docker Hub - # if: ${{ secrets.DOCKERHUB_USERNAME }} - # uses: docker/login-action@v3 - # with: - # username: ${{ secrets.DOCKERHUB_USERNAME }} - # password: ${{ secrets.DOCKERHUB_TOKEN }} + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} - name: Extract metadata id: meta uses: docker/metadata-action@v5 with: - images: | - ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # ${{ secrets.DOCKERHUB_USERNAME }}/nemaquant + images: ${{ env.IMAGE }} tags: | - type=raw,value=latest - type=sha,prefix=main- + # version from CHANGELOG.md: produce 1.2.3 / 1.2.3-gpu + type=raw,value=${{ steps.version.outputs.version }}${{ matrix.tag_suffix }} + # on main branch: produce latest / latest-gpu + type=raw,value=latest,suffix=${{ matrix.tag_suffix }} - - name: Build and push Docker image - uses: docker/build-push-action@v5 + - name: Build and push + uses: docker/build-push-action@v6 with: context: . + file: ${{ matrix.dockerfile }} platforms: linux/amd64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max + cache-from: type=gha,scope=${{ matrix.variant }} + cache-to: type=gha,mode=max,scope=${{ matrix.variant }} - name: Summary run: | - echo "## Build Summary" >> $GITHUB_STEP_SUMMARY - echo "- **GitHub Container Registry**: \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest\`" >> $GITHUB_STEP_SUMMARY - echo "- **Platform**: linux/amd64 (optimized for Docker Desktop)" >> $GITHUB_STEP_SUMMARY - # if [ -n "${{ secrets.DOCKERHUB_USERNAME }}" ]; then - # echo "- **Docker Hub**: \`${{ secrets.DOCKERHUB_USERNAME }}/nemaquant:latest\`" >> $GITHUB_STEP_SUMMARY - # fi - echo "" >> $GITHUB_STEP_SUMMARY - echo "### Pull the image:" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY - echo "docker pull ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest" >> $GITHUB_STEP_SUMMARY + echo "## ${{ matrix.variant }} image" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY + echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY echo "\`\`\`" >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/deploy-to-hf.yml b/.github/workflows/deploy-to-hf.yml index 63ada9f..123d1e6 100644 --- a/.github/workflows/deploy-to-hf.yml +++ b/.github/workflows/deploy-to-hf.yml @@ -2,9 +2,12 @@ name: Deploy to Hugging Face Spaces on: push: - branches: [ main ] + branches: [ main, docker_beauty ] # TODO: remove docker_beauty before merging workflow_dispatch: +env: + HF_SPACE_REPO: breedinginsight/nemaquant # update if the Space name changes + jobs: deploy: runs-on: ubuntu-latest @@ -17,88 +20,75 @@ jobs: with: lfs: true - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.12' + - name: Validate secrets + run: | + if [ -z "${{ secrets.HUGGINGFACE_TOKEN }}" ]; then + echo "ERROR: HUGGINGFACE_TOKEN secret is not set." >&2 + exit 1 + fi - - name: Install dependencies + - name: Configure git run: | - pip install huggingface_hub - # Install Git LFS - sudo apt-get update - sudo apt-get install -y git-lfs + git config --global user.name "GitHub Actions" + git config --global user.email "actions@github.com" + git config --global credential.helper store + echo "https://user:${{ secrets.HUGGINGFACE_TOKEN }}@huggingface.co" > ~/.git-credentials git lfs install - - name: Push to Hugging Face Space - env: - HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} + - name: Clone existing HF Space run: | - # Check if HUGGINGFACE_SPACE_REPO is set - if [ -z "${{ secrets.HUGGINGFACE_SPACE_REPO }}" ]; then - echo "HUGGINGFACE_SPACE_REPO secret not set. Please set it to your space repository name (e.g., 'username/nemaquant')" - exit 1 - fi - - # Configure git credentials for Hugging Face - git config --global credential.helper store - echo "https://user:${HF_TOKEN}@huggingface.co" > ~/.git-credentials - - # Create a temporary directory for the space - mkdir -p space_repo + git clone https://huggingface.co/spaces/${{ env.HF_SPACE_REPO }} space_repo cd space_repo - - # Initialize git repository with LFS - git init -b main git lfs install - git remote add origin https://huggingface.co/spaces/${{ secrets.HUGGINGFACE_SPACE_REPO }} - - # Create .gitattributes for LFS BEFORE copying files - echo "*.pt filter=lfs diff=lfs merge=lfs -text" > .gitattributes - echo "*.pth filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - echo "*.bin filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - echo "*.h5 filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - echo "*.onnx filter=lfs diff=lfs merge=lfs -text" >> .gitattributes - - # Copy necessary files - cp ../README.md . + + - name: Sync files into Space + run: | + cd space_repo + + # Ensure LFS tracks model file types + cat > .gitattributes <<'EOF' + *.pt filter=lfs diff=lfs merge=lfs -text + *.pth filter=lfs diff=lfs merge=lfs -text + *.bin filter=lfs diff=lfs merge=lfs -text + *.h5 filter=lfs diff=lfs merge=lfs -text + *.onnx filter=lfs diff=lfs merge=lfs -text + EOF + + # Sync application files cp ../app.py . cp ../requirements.txt . cp ../Dockerfile . cp ../yolo_utils.py . + cp ../README.md . cp -r ../templates . cp -r ../static . - - # Copy all .pt and .onnx files from the root directory - echo "Checking for model files..." - find .. -maxdepth 1 -type f -name "*.pt" -exec cp {} . \; - find .. -maxdepth 1 -type f -name "*.onnx" -exec cp {} . \; - - # List what we're about to commit + + # Sync model weight files + find .. -maxdepth 1 -type f \( -name "*.pt" -o -name "*.onnx" \) -exec cp {} . \; + echo "Files to be committed:" ls -lh - - # Add and commit - git add .gitattributes + + - name: Commit and push + run: | + cd space_repo git add . - git config user.name "GitHub Actions" - git config user.email "actions@github.com" - git commit -m "Update NemaQuant app from GitHub Actions - ${{ github.sha }}" - - # Push to Hugging Face Space (force push to main branch) - git push --force origin main + # Only commit if there are actual changes + if git diff --cached --quiet; then + echo "No changes to deploy." + else + git commit -m "deploy: sync from GitHub ${{ github.sha }}" + git push origin main + fi - name: Summary run: | echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY - echo "- **Hugging Face Space**: \`https://huggingface.co/spaces/${{ secrets.HUGGINGFACE_SPACE_REPO }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Space**: [spaces/${{ env.HF_SPACE_REPO }}](https://huggingface.co/spaces/${{ env.HF_SPACE_REPO }})" >> $GITHUB_STEP_SUMMARY echo "- **Commit**: \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY - echo "- **Status**: Successfully deployed to HF Spaces" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY - echo "### Model Files:" >> $GITHUB_STEP_SUMMARY + echo "### Model files deployed:" >> $GITHUB_STEP_SUMMARY cd space_repo for file in *.pt *.onnx; do - if [ -f "$file" ]; then - echo "- ✅ $file ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY - fi + [ -f "$file" ] && echo "- \`$file\` ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY done diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..fbf8d06 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +Docker image is versioned based on this file. +Please, follow the exact format to track versions and updates. +Add new versions on top of olders. + +## [0.0.1] - 2026-03-19 + +- Docker images split by CPU or GPU usage + - CPU image (`Dockerfile`): ~2.3 GB, based on `python:3.12-slim` + - GPU image (`Dockerfile.gpu`): ~10 GB, based on `nvidia/cuda:12.8.1-base-ubuntu24.04` +- CPU image uses CPU-only torch wheel (~250 MB vs ~2.5 GB CUDA wheel) +- GPU image uses CUDA 12.8 torch wheel; cuDNN bundled inside torch, no `cudnn-runtime` base needed +- Switched from `opencv-python` to `opencv-python-headless` (server environment, no display needed) +- Force-reinstall `opencv-python-headless` after `ultralytics` to prevent full opencv being pulled as transitive dependency +- Removed unnecessary apt packages (`libgl1`, `libsm6`, `libxrender1`, `libxext6`) — only needed by full opencv +- Fixed `YOLO_CONFIG_DIR` to use absolute path +- Fixed `PATH` to correctly point to `/home/user/.local/bin` +- Updated CI/CD GitHub Actions workflows: + - `build-images.yml`: builds and pushes both CPU and GPU images to Docker Hub on push to `main`, tagged with version from `CHANGELOG.md` and `latest` + - `deploy-to-hf.yml`: syncs app files and model weights to Hugging Face Space on push to `main`, using Git LFS for weight files \ No newline at end of file From 8aba29ca00a776f6a497e700939e9e0edeba9f8a Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 11:30:19 -0400 Subject: [PATCH 03/24] fix credentials --- .github/workflows/build-images.yml | 2 +- .github/workflows/deploy-to-hf.yml | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index eebb40d..0648600 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -6,7 +6,7 @@ on: workflow_dispatch: env: - IMAGE: ${{ secrets.DOCKERHUB_USERNAME }}/nemaquant + IMAGE: breedinginsight/nemaquant # Docker Hub org/image jobs: build: diff --git a/.github/workflows/deploy-to-hf.yml b/.github/workflows/deploy-to-hf.yml index 123d1e6..2927ae3 100644 --- a/.github/workflows/deploy-to-hf.yml +++ b/.github/workflows/deploy-to-hf.yml @@ -26,18 +26,22 @@ jobs: echo "ERROR: HUGGINGFACE_TOKEN secret is not set." >&2 exit 1 fi + if [ -z "${{ secrets.HUGGINGFACE_USERNAME }}" ]; then + echo "ERROR: HUGGINGFACE_USERNAME secret is not set." >&2 + exit 1 + fi - name: Configure git run: | git config --global user.name "GitHub Actions" git config --global user.email "actions@github.com" git config --global credential.helper store - echo "https://user:${{ secrets.HUGGINGFACE_TOKEN }}@huggingface.co" > ~/.git-credentials + echo "https://${{ secrets.HUGGINGFACE_USERNAME }}:${{ secrets.HUGGINGFACE_TOKEN }}@huggingface.co" > ~/.git-credentials git lfs install - name: Clone existing HF Space run: | - git clone https://huggingface.co/spaces/${{ env.HF_SPACE_REPO }} space_repo + git clone https://${{ secrets.HUGGINGFACE_USERNAME }}:${{ secrets.HUGGINGFACE_TOKEN }}@huggingface.co/spaces/${{ env.HF_SPACE_REPO }} space_repo cd space_repo git lfs install From 8973ad7e44914203bca669996c1ffee18cf63b55 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 11:40:30 -0400 Subject: [PATCH 04/24] add dockerhub meta --- .github/workflows/build-images.yml | 4 +- .github/workflows/deploy-to-hf.yml | 2 +- .github/workflows/update-dockerhub-meta.yml | 58 +++++++++++++++++++++ CHANGELOG.md | 3 +- 4 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/update-dockerhub-meta.yml diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 0648600..6d275e1 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -24,7 +24,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Extract version from CHANGELOG.md id: version @@ -64,6 +64,8 @@ jobs: file: ${{ matrix.dockerfile }} platforms: linux/amd64 push: true + provenance: false # avoids 400 errors on Docker Hub caused by BuildKit attestation manifests + sbom: false tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha,scope=${{ matrix.variant }} diff --git a/.github/workflows/deploy-to-hf.yml b/.github/workflows/deploy-to-hf.yml index 2927ae3..0540c1a 100644 --- a/.github/workflows/deploy-to-hf.yml +++ b/.github/workflows/deploy-to-hf.yml @@ -16,7 +16,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 with: lfs: true diff --git a/.github/workflows/update-dockerhub-meta.yml b/.github/workflows/update-dockerhub-meta.yml new file mode 100644 index 0000000..eddd675 --- /dev/null +++ b/.github/workflows/update-dockerhub-meta.yml @@ -0,0 +1,58 @@ +name: Update Docker Hub Metadata + +on: + push: + branches: [ main ] + paths: + - README.md # re-run when README changes + - .github/workflows/update-dockerhub-meta.yml + workflow_dispatch: + +env: + DOCKERHUB_REPO: breedinginsight/nemaquant + +jobs: + update-meta: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v5 + + - name: Update Docker Hub description + uses: peter-evans/dockerhub-description@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + repository: ${{ env.DOCKERHUB_REPO }} + short-description: "YOLO-based nematode egg detection with real-time processing" + readme-filepath: ./README.md # used as the full (long) description on Docker Hub + + - name: Set Docker Hub category via API + run: | + # Authenticate and get JWT token + TOKEN=$(curl -s -X POST "https://hub.docker.com/v2/users/login" \ + -H "Content-Type: application/json" \ + -d "{\"username\": \"${{ secrets.DOCKERHUB_USERNAME }}\", \"password\": \"${{ secrets.DOCKERHUB_PASSWORD }}\"}" \ + | jq -r '.token') + + if [ -z "$TOKEN" ] || [ "$TOKEN" = "null" ]; then + echo "ERROR: Failed to authenticate with Docker Hub" >&2 + exit 1 + fi + + # Set repository category (Machine Learning) + # Full list: https://hub.docker.com/search?categories= + curl -s -X PATCH "https://hub.docker.com/v2/repositories/${{ env.DOCKERHUB_REPO }}/" \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"categories": [{"name": "Machine Learning"}]}' \ + | jq . + + - name: Summary + run: | + echo "## Docker Hub Metadata Updated" >> $GITHUB_STEP_SUMMARY + echo "- **Repository**: [hub.docker.com/r/${{ env.DOCKERHUB_REPO }}](https://hub.docker.com/r/${{ env.DOCKERHUB_REPO }})" >> $GITHUB_STEP_SUMMARY + echo "- **Short description**: YOLO-based nematode egg detection with real-time processing" >> $GITHUB_STEP_SUMMARY + echo "- **Full description**: synced from \`README.md\`" >> $GITHUB_STEP_SUMMARY + echo "- **Category**: Machine Learning" >> $GITHUB_STEP_SUMMARY diff --git a/CHANGELOG.md b/CHANGELOG.md index fbf8d06..a113f52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,4 +16,5 @@ Add new versions on top of olders. - Fixed `PATH` to correctly point to `/home/user/.local/bin` - Updated CI/CD GitHub Actions workflows: - `build-images.yml`: builds and pushes both CPU and GPU images to Docker Hub on push to `main`, tagged with version from `CHANGELOG.md` and `latest` - - `deploy-to-hf.yml`: syncs app files and model weights to Hugging Face Space on push to `main`, using Git LFS for weight files \ No newline at end of file + - `deploy-to-hf.yml`: syncs app files and model weights to Hugging Face Space on push to `main`, using Git LFS for weight files +- Added `update-dockerhub-meta.yml` to make a pretty Dockerhub description based on the `README.md` \ No newline at end of file From 607d9479768586cfebd7ee10f60fa8ae777045be Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 12:09:37 -0400 Subject: [PATCH 05/24] diagnostic --- .github/workflows/build-images.yml | 4 ++-- .github/workflows/deploy-to-hf.yml | 24 ++++++++++++--------- .github/workflows/update-dockerhub-meta.yml | 2 +- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 6d275e1..79e3219 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -63,9 +63,9 @@ jobs: context: . file: ${{ matrix.dockerfile }} platforms: linux/amd64 - push: true - provenance: false # avoids 400 errors on Docker Hub caused by BuildKit attestation manifests + provenance: false sbom: false + outputs: type=image,push=true,compression=zstd,compression-level=3,force-compression=true,oci-mediatypes=true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha,scope=${{ matrix.variant }} diff --git a/.github/workflows/deploy-to-hf.yml b/.github/workflows/deploy-to-hf.yml index 0540c1a..4ec5d84 100644 --- a/.github/workflows/deploy-to-hf.yml +++ b/.github/workflows/deploy-to-hf.yml @@ -5,15 +5,16 @@ on: branches: [ main, docker_beauty ] # TODO: remove docker_beauty before merging workflow_dispatch: -env: - HF_SPACE_REPO: breedinginsight/nemaquant # update if the Space name changes - jobs: deploy: runs-on: ubuntu-latest permissions: contents: read + env: + HF_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} + HF_SPACE_REPO: ${{ secrets.HUGGINGFACE_SPACE_REPO }} + steps: - name: Checkout repository uses: actions/checkout@v5 @@ -22,12 +23,12 @@ jobs: - name: Validate secrets run: | - if [ -z "${{ secrets.HUGGINGFACE_TOKEN }}" ]; then + if [ -z "$HF_TOKEN" ]; then echo "ERROR: HUGGINGFACE_TOKEN secret is not set." >&2 exit 1 fi - if [ -z "${{ secrets.HUGGINGFACE_USERNAME }}" ]; then - echo "ERROR: HUGGINGFACE_USERNAME secret is not set." >&2 + if [ -z "$HF_SPACE_REPO" ]; then + echo "ERROR: HUGGINGFACE_SPACE_REPO secret is not set." >&2 exit 1 fi @@ -36,12 +37,13 @@ jobs: git config --global user.name "GitHub Actions" git config --global user.email "actions@github.com" git config --global credential.helper store - echo "https://${{ secrets.HUGGINGFACE_USERNAME }}:${{ secrets.HUGGINGFACE_TOKEN }}@huggingface.co" > ~/.git-credentials + # HF accepts any username when authenticating via token + printf 'https://user:%s@huggingface.co\n' "$HF_TOKEN" > ~/.git-credentials git lfs install - name: Clone existing HF Space run: | - git clone https://${{ secrets.HUGGINGFACE_USERNAME }}:${{ secrets.HUGGINGFACE_TOKEN }}@huggingface.co/spaces/${{ env.HF_SPACE_REPO }} space_repo + git clone "https://huggingface.co/spaces/$HF_SPACE_REPO" space_repo cd space_repo git lfs install @@ -77,7 +79,6 @@ jobs: run: | cd space_repo git add . - # Only commit if there are actual changes if git diff --cached --quiet; then echo "No changes to deploy." else @@ -88,7 +89,7 @@ jobs: - name: Summary run: | echo "## Deployment Summary" >> $GITHUB_STEP_SUMMARY - echo "- **Space**: [spaces/${{ env.HF_SPACE_REPO }}](https://huggingface.co/spaces/${{ env.HF_SPACE_REPO }})" >> $GITHUB_STEP_SUMMARY + echo "- **Space**: [spaces/$HF_SPACE_REPO](https://huggingface.co/spaces/$HF_SPACE_REPO)" >> $GITHUB_STEP_SUMMARY echo "- **Commit**: \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY echo "### Model files deployed:" >> $GITHUB_STEP_SUMMARY @@ -96,3 +97,6 @@ jobs: for file in *.pt *.onnx; do [ -f "$file" ] && echo "- \`$file\` ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY done + for file in *.pt *.onnx; do + [ -f "$file" ] && echo "- \`$file\` ($(ls -lh "$file" | awk '{print $5}'))" >> $GITHUB_STEP_SUMMARY + done diff --git a/.github/workflows/update-dockerhub-meta.yml b/.github/workflows/update-dockerhub-meta.yml index eddd675..d39340f 100644 --- a/.github/workflows/update-dockerhub-meta.yml +++ b/.github/workflows/update-dockerhub-meta.yml @@ -2,7 +2,7 @@ name: Update Docker Hub Metadata on: push: - branches: [ main ] + branches: [ main, docker_beauty ] paths: - README.md # re-run when README changes - .github/workflows/update-dockerhub-meta.yml From 595cad516f78b5793465f926fc7335758daa8f23 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 12:54:49 -0400 Subject: [PATCH 06/24] fix paths for hf --- CHANGELOG.md | 3 ++- README.md | 18 ++++++++++++------ app.py | 8 ++++---- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a113f52..407e5fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,4 +17,5 @@ Add new versions on top of olders. - Updated CI/CD GitHub Actions workflows: - `build-images.yml`: builds and pushes both CPU and GPU images to Docker Hub on push to `main`, tagged with version from `CHANGELOG.md` and `latest` - `deploy-to-hf.yml`: syncs app files and model weights to Hugging Face Space on push to `main`, using Git LFS for weight files -- Added `update-dockerhub-meta.yml` to make a pretty Dockerhub description based on the `README.md` \ No newline at end of file +- Added `update-dockerhub-meta.yml` to make a pretty Dockerhub description based on the `README.md` +- Fixed `uploads/`, `results/`, `annotated/` directories not being created at runtime on HF Spaces — re-enabled `mkdir` calls in `app.py` at startup (HF container filesystem can overlay image build dirs) diff --git a/README.md b/README.md index 9fd5b1a..29c3d91 100644 --- a/README.md +++ b/README.md @@ -108,13 +108,23 @@ Process 500 images for: ### Docker Deployment 1. **Build the Container**: + +- For only CPU usage: + ```bash - docker build -t nemaquant-flask . + docker pull breedinginsight/nemaquant + ``` + +- For GPU usage: + + ```bash + docker pull breedinginsight/nemaquant:latest-gpu ``` 2. **Run the Container**: + ```bash - docker run -p 7860:7860 -v $(pwd)/results:/app/results nemaquant-flask + docker run -p 7860:7860 -v $(pwd)/results:/app/results breedinginsight/nemaquant ``` ### Hugging Face Spaces Deployment @@ -191,7 +201,3 @@ Process 500 images for: - Time of day (free tier performance varies with overall platform usage) For most users, the free tier is sufficient for small to medium batches (< 200 images), while the CPU upgrade offers a good balance of cost and performance for larger datasets. GPU options are recommended only for time-sensitive processing of large batches or when processing thousands of images. - -## License - -[Specify your license here] diff --git a/app.py b/app.py index 133b81b..b8a60c7 100644 --- a/app.py +++ b/app.py @@ -43,10 +43,10 @@ app.config['WEIGHTS_FILE'] = str(WEIGHTS_FILE) app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'tif', 'tiff'} -# skip these -- created dirs in dockerfile -# UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) -# RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) -# ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) +# Create dirs at startup in case the container filesystem overlays the image build dirs +UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) +RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) +ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) # Load model once at startup, use CUDA if available MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu' From 4710d63a980234606d7beba5049320b361de63f3 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 13:11:04 -0400 Subject: [PATCH 07/24] fix attempt --- app.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/app.py b/app.py index b8a60c7..0a4ed4f 100644 --- a/app.py +++ b/app.py @@ -33,9 +33,14 @@ log.setLevel(logging.ERROR) APP_ROOT = Path(__file__).parent -UPLOAD_FOLDER = APP_ROOT / 'uploads' -RESULTS_FOLDER = APP_ROOT / 'results' -ANNOT_FOLDER = APP_ROOT / 'annotated' +# On HF Spaces, the app filesystem overlay makes app-root dirs unreliable across requests. +# Use /tmp (RAM-backed tmpfs, always writable) when running on HF Spaces. +# Locally, use app root so files persist across container restarts. +_ON_HF_SPACES = os.environ.get('SPACE_ID') is not None +_data_root = Path('/tmp/nemaquant') if _ON_HF_SPACES else APP_ROOT +UPLOAD_FOLDER = _data_root / 'uploads' +RESULTS_FOLDER = _data_root / 'results' +ANNOT_FOLDER = _data_root / 'annotated' WEIGHTS_FILE = APP_ROOT / 'weights.pt' app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER) app.config['RESULTS_FOLDER'] = str(RESULTS_FOLDER) @@ -43,10 +48,11 @@ app.config['WEIGHTS_FILE'] = str(WEIGHTS_FILE) app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'tif', 'tiff'} -# Create dirs at startup in case the container filesystem overlays the image build dirs +# Create dirs at startup UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) +print(f"Running on HF Spaces: {_ON_HF_SPACES} | Data root: {_data_root}") # Load model once at startup, use CUDA if available MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu' From 618f20bc48b85000312050d45345eefcae49b3c6 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 13:42:15 -0400 Subject: [PATCH 08/24] fix hf build --- .github/workflows/deploy-to-hf.yml | 6 +++++- Dockerfile | 1 + README.md | 32 ++++++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy-to-hf.yml b/.github/workflows/deploy-to-hf.yml index 4ec5d84..2ba4328 100644 --- a/.github/workflows/deploy-to-hf.yml +++ b/.github/workflows/deploy-to-hf.yml @@ -78,12 +78,16 @@ jobs: - name: Commit and push run: | cd space_repo + echo "--- Remote URL ---" + git remote -v + echo "--- Current HEAD ---" + git log --oneline -3 git add . if git diff --cached --quiet; then echo "No changes to deploy." else git commit -m "deploy: sync from GitHub ${{ github.sha }}" - git push origin main + git push -v origin main fi - name: Summary diff --git a/Dockerfile b/Dockerfile index 08daaf2..2941f06 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ # CPU image - use Dockerfile.gpu for GPU support FROM python:3.12.13-slim-trixie +# Cache bust: 2026-03-19 # run updates before switching over to non-root user RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/README.md b/README.md index 29c3d91..5d58870 100644 --- a/README.md +++ b/README.md @@ -105,28 +105,52 @@ Process 500 images for: ``` The application will be available at `http://localhost:7860` -### Docker Deployment +### Container Deployment 1. **Build the Container**: -- For only CPU usage: +DEfault image `breedinginsight/nemaquant` is exclusive for **CPU usage**. +For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` + +- With Docker ```bash docker pull breedinginsight/nemaquant ``` -- For GPU usage: + +- With Apptainer/Singularity + Slurm from a server: ```bash - docker pull breedinginsight/nemaquant:latest-gpu + # 1) On the login node: pull the image once (creates a .sif file) + apptainer pull nemaquant_latest.sif docker://breedinginsight/nemaquant:latest + + # 2) Request an interactive compute allocation (adjust for your cluster and analysis) + salloc -c 4 --mem=16G --time=02:00:00 + + # 3) On the compute node shell that opens, run the app on port 7860 + export PORT=7860 + apptainer run --cleanenv --env PORT=$PORT nemaquant_latest.sif ``` +For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` + 2. **Run the Container**: +- With Docker + ```bash docker run -p 7860:7860 -v $(pwd)/results:/app/results breedinginsight/nemaquant ``` +- With Apptainer/Singularity + Slurm from our local computer (after running the above commands on server): + + ```bash + # Replace user and host with your cluster login node. + # If your cluster requires a direct tunnel to the compute node, adapt accordingly. + ssh -L 7860:localhost:7860 [userID]@[yourcluster.address] + ``` + ### Hugging Face Spaces Deployment 1. Create a new Space on [Hugging Face](https://huggingface.co/new-space) From 79ac8fe19fc8dfed0d591248889355a9bd69f532 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 14:32:34 -0400 Subject: [PATCH 09/24] use tmp --- app.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index 0a4ed4f..41585ff 100644 --- a/app.py +++ b/app.py @@ -33,14 +33,12 @@ log.setLevel(logging.ERROR) APP_ROOT = Path(__file__).parent -# On HF Spaces, the app filesystem overlay makes app-root dirs unreliable across requests. -# Use /tmp (RAM-backed tmpfs, always writable) when running on HF Spaces. -# Locally, use app root so files persist across container restarts. -_ON_HF_SPACES = os.environ.get('SPACE_ID') is not None -_data_root = Path('/tmp/nemaquant') if _ON_HF_SPACES else APP_ROOT -UPLOAD_FOLDER = _data_root / 'uploads' -RESULTS_FOLDER = _data_root / 'results' -ANNOT_FOLDER = _data_root / 'annotated' +# Use /tmp for runtime data — works reliably on all container platforms. +# /tmp is RAM-backed tmpfs, always writable, avoids overlay filesystem issues on HF Spaces. +# Note: /tmp is cleared on container restart (uploads/results are transient by design). +UPLOAD_FOLDER = Path('/tmp/nemaquant/uploads') +RESULTS_FOLDER = Path('/tmp/nemaquant/results') +ANNOT_FOLDER = Path('/tmp/nemaquant/annotated') WEIGHTS_FILE = APP_ROOT / 'weights.pt' app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER) app.config['RESULTS_FOLDER'] = str(RESULTS_FOLDER) @@ -48,11 +46,11 @@ app.config['WEIGHTS_FILE'] = str(WEIGHTS_FILE) app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'tif', 'tiff'} -# Create dirs at startup +# Create dirs at startup UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) -print(f"Running on HF Spaces: {_ON_HF_SPACES} | Data root: {_data_root}") +print(f"Data root: /tmp/nemaquant | Weights: {WEIGHTS_FILE}") # Load model once at startup, use CUDA if available MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu' From 886fc2c4d371fb73c4dbfb4e1efa27883b60b4b4 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 14:42:48 -0400 Subject: [PATCH 10/24] set key --- README.md | 1 - app.py | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5d58870..1c8f4b9 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,6 @@ For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` docker pull breedinginsight/nemaquant ``` - - With Apptainer/Singularity + Slurm from a server: ```bash diff --git a/app.py b/app.py index 41585ff..188a70d 100644 --- a/app.py +++ b/app.py @@ -25,7 +25,13 @@ from yolo_utils import detect_in_image app = Flask(__name__) -app.secret_key = os.environ.get('FLASK_SECRET_KEY', str(uuid.uuid4())) # For session security +_secret_key = os.environ.get('FLASK_SECRET_KEY') +if not _secret_key: + # Fallback for local dev only — sessions won't persist across restarts. + # On HF Spaces, set FLASK_SECRET_KEY as a Space secret to avoid session loss between workers. + _secret_key = str(uuid.uuid4()) + print("WARNING: FLASK_SECRET_KEY not set — using random key. Sessions will break across workers/restarts.") +app.secret_key = _secret_key # disable werkzeug logging - too noisy # comment out these lines if you want to see full logs From f3f09c3028794227ffb4ba8e2dd441deed630b99 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 14:52:50 -0400 Subject: [PATCH 11/24] debug --- app.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app.py b/app.py index 188a70d..451c4fd 100644 --- a/app.py +++ b/app.py @@ -31,6 +31,8 @@ # On HF Spaces, set FLASK_SECRET_KEY as a Space secret to avoid session loss between workers. _secret_key = str(uuid.uuid4()) print("WARNING: FLASK_SECRET_KEY not set — using random key. Sessions will break across workers/restarts.") +else: + print(f"INFO: FLASK_SECRET_KEY is set (length={len(_secret_key)})") app.secret_key = _secret_key # disable werkzeug logging - too noisy From facd3886b22089e9158175b9656a8e49cd1b5879 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 15:00:30 -0400 Subject: [PATCH 12/24] more debug --- app.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/app.py b/app.py index 451c4fd..5c12641 100644 --- a/app.py +++ b/app.py @@ -87,10 +87,12 @@ def upload_files(): session_id = session['id'] files = request.files.getlist('files') upload_dir = Path(app.config['UPLOAD_FOLDER']) / session_id + print(f"DEBUG /uploads: session_id={session_id}, upload_dir={upload_dir}") # clear out any existing files for the session if upload_dir.exists(): shutil.rmtree(upload_dir) upload_dir.mkdir(parents=True, exist_ok=True) + print(f"DEBUG /uploads: dir created, exists={upload_dir.exists()}") # generate new unique filenames via uuid, save the mapping dict of old:new to session filename_map = {} uuid_map_to_uuid_imgname = {} @@ -165,6 +167,9 @@ def process_single_image(img_path, results_dir): @app.route('/process', methods=['POST']) def start_processing(): session_id = session['id'] + upload_dir_check = Path(app.config['UPLOAD_FOLDER']) / session_id + print(f"DEBUG /process: session_id={session_id}, upload_dir={upload_dir_check}, exists={upload_dir_check.exists()}") + print(f"DEBUG /process: /tmp/nemaquant/uploads contents={list(Path(app.config['UPLOAD_FOLDER']).iterdir()) if Path(app.config['UPLOAD_FOLDER']).exists() else 'UPLOAD_FOLDER missing'}") job_state = { "status": "starting", "progress": 0, From d8d396e34cf4c78eeefb27a01656a263b54ee41e Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 15:09:24 -0400 Subject: [PATCH 13/24] again --- app.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 5c12641..395ed86 100644 --- a/app.py +++ b/app.py @@ -35,7 +35,14 @@ print(f"INFO: FLASK_SECRET_KEY is set (length={len(_secret_key)})") app.secret_key = _secret_key -# disable werkzeug logging - too noisy +# HF Spaces serves over HTTPS via a reverse proxy and may embed the app in an iframe. +# SameSite=None;Secure is required so cookies are sent in cross-site/iframe POST requests. +# HF sets SPACE_HOST env var; fall back to checking SPACE_ID or SPACE_AUTHOR_NAME. +_on_https = any(os.environ.get(v) for v in ('SPACE_HOST', 'SPACE_ID', 'SPACE_AUTHOR_NAME')) +app.config['SESSION_COOKIE_SECURE'] = _on_https +app.config['SESSION_COOKIE_SAMESITE'] = 'None' if _on_https else 'Lax' +app.config['SESSION_COOKIE_HTTPONLY'] = True +print(f"INFO: SESSION_COOKIE_SECURE={_on_https}, SAMESITE={'None' if _on_https else 'Lax'}") # comment out these lines if you want to see full logs log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) From 81f99f3625b24e09c0abda4dd946734bb7cafd94 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 15:20:30 -0400 Subject: [PATCH 14/24] solve apptainer bug --- Dockerfile | 2 +- Dockerfile.gpu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2941f06..b270102 100644 --- a/Dockerfile +++ b/Dockerfile @@ -57,4 +57,4 @@ EXPOSE 7860 # Use gunicorn for production deployment if preferred over Flask's development server # CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"] # For simplicity during development and typical HF Spaces use: -CMD ["python", "app.py"] \ No newline at end of file +CMD ["python", "/home/user/app/app.py"] \ No newline at end of file diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 8230fdd..bf97616 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -45,4 +45,4 @@ ENV YOLO_CONFIG_DIR=$HOME/app/.yolo_config EXPOSE 7860 -CMD ["python", "app.py"] +CMD ["python", "/home/user/app/app.py"] From 7482a2bc0541f885d455f63e1b2774305c3e20f5 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 15:27:47 -0400 Subject: [PATCH 15/24] solve2 for apptainer --- Dockerfile | 5 ++++- Dockerfile.gpu | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index b270102..d556e4f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,8 +12,11 @@ RUN useradd -m -u 1000 user USER user # Set home to user's home dir and add local bin to PATH +# PYTHONPATH is set explicitly so packages are found even when HOME is overridden +# (e.g. by Apptainer --cleanenv, which resets HOME to the host user's home) ENV HOME=/home/user \ - PATH=/home/user/.local/bin:$PATH + PATH=/home/user/.local/bin:$PATH \ + PYTHONPATH=/home/user/.local/lib/python3.12/site-packages # Set the working directory in the container WORKDIR $HOME/app diff --git a/Dockerfile.gpu b/Dockerfile.gpu index bf97616..86c7186 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -22,8 +22,11 @@ RUN usermod -l user ubuntu && usermod -d /home/user -m user USER user # Set home to user's home dir and add local bin to PATH +# PYTHONPATH is set explicitly so packages are found even when HOME is overridden +# (e.g. by Apptainer --cleanenv, which resets HOME to the host user's home) ENV HOME=/home/user \ - PATH=/home/user/.local/bin:$PATH + PATH=/home/user/.local/bin:$PATH \ + PYTHONPATH=/home/user/.local/lib/python3.12/site-packages # Set the working directory in the container WORKDIR $HOME/app From fc8dfa83a03305c178bc306a3cbf1f4b6fe50ac7 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 15:38:23 -0400 Subject: [PATCH 16/24] more apptainer errors --- Dockerfile | 6 ++++-- Dockerfile.gpu | 6 ++++-- app.py | 10 +++++++--- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index d556e4f..b7a2745 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,8 +41,10 @@ COPY --chown=user . $HOME/app # we should not need to chown, since we are using USER user above RUN mkdir -p uploads results annotated .yolo_config -# set the env var for YOLO user config directory -ENV YOLO_CONFIG_DIR=$HOME/app/.yolo_config +# Point YOLO config to /tmp so it is writable under Apptainer (read-only SIF) +# and HF Spaces. /home/user/app/.yolo_config is kept in the image but only used +# as a fallback when the container filesystem is writable (plain Docker). +ENV YOLO_CONFIG_DIR=/tmp/nemaquant/.yolo_config # Copy the rest of the application code into the container at /app # This includes app.py, nemaquant.py, templates/, static/, etc. diff --git a/Dockerfile.gpu b/Dockerfile.gpu index 86c7186..9b85d8f 100644 --- a/Dockerfile.gpu +++ b/Dockerfile.gpu @@ -43,8 +43,10 @@ COPY --chown=user . $HOME/app # Create the necessary dirs RUN mkdir -p uploads results annotated .yolo_config -# set the env var for YOLO user config directory -ENV YOLO_CONFIG_DIR=$HOME/app/.yolo_config +# Point YOLO config to /tmp so it is writable under Apptainer (read-only SIF) +# and HF Spaces. /home/user/app/.yolo_config is kept in the image but only used +# as a fallback when the container filesystem is writable (plain Docker). +ENV YOLO_CONFIG_DIR=/tmp/nemaquant/.yolo_config EXPOSE 7860 diff --git a/app.py b/app.py index 395ed86..b8531e1 100644 --- a/app.py +++ b/app.py @@ -65,6 +65,10 @@ UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) +# YOLO_CONFIG_DIR points to /tmp/nemaquant/.yolo_config (set in Dockerfile ENV). +# Create it here so ultralytics can write its cache on read-only container filesystems +# (e.g. Apptainer SIF images). +Path(os.environ.get('YOLO_CONFIG_DIR', '/tmp/nemaquant/.yolo_config')).mkdir(parents=True, exist_ok=True) print(f"Data root: /tmp/nemaquant | Weights: {WEIGHTS_FILE}") # Load model once at startup, use CUDA if available @@ -304,7 +308,7 @@ def annotate_image(): session_id = session['id'] uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) - orig_img_name = session['filename_map'].get(uuid) + orig_img_name = session.get('filename_map', {}).get(uuid) if not img_name: return jsonify({'error': 'File not found'}), 404 @@ -392,7 +396,7 @@ def export_csv(): data = request.json session_id = session['id'] job_state = session.get('job_state') - filename_map = session.get('filename_map') + filename_map = session.get('filename_map') or {} threshold = float(data.get('confidence', 0.5)) if not job_state: return jsonify({'error': 'Job not found'}), 404 @@ -410,7 +414,7 @@ def export_csv(): rows = [] for uuid in all_results.keys(): count = sum(1 for d in all_results[uuid] if d['score'] >= threshold) - rows.append({'Filename': filename_map[uuid], 'EggsDetected': count, 'ConfidenceThreshold': threshold}) + rows.append({'Filename': filename_map.get(uuid, uuid), 'EggsDetected': count, 'ConfidenceThreshold': threshold}) rows = sorted(rows, key=lambda x: x['Filename'].lower()) # write the CSV out timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') From 06f56a8b8f69f34fecef5d84c6cb28fb4247d9be Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 15:53:57 -0400 Subject: [PATCH 17/24] again --- app.py | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/app.py b/app.py index b8531e1..e7e0f5d 100644 --- a/app.py +++ b/app.py @@ -1,5 +1,6 @@ import os import uuid +import json import traceback import sys import time @@ -54,6 +55,7 @@ UPLOAD_FOLDER = Path('/tmp/nemaquant/uploads') RESULTS_FOLDER = Path('/tmp/nemaquant/results') ANNOT_FOLDER = Path('/tmp/nemaquant/annotated') +SESSION_META_FOLDER = Path('/tmp/nemaquant/sessions') WEIGHTS_FILE = APP_ROOT / 'weights.pt' app.config['UPLOAD_FOLDER'] = str(UPLOAD_FOLDER) app.config['RESULTS_FOLDER'] = str(RESULTS_FOLDER) @@ -65,12 +67,34 @@ UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True) RESULTS_FOLDER.mkdir(parents=True, exist_ok=True) ANNOT_FOLDER.mkdir(parents=True, exist_ok=True) +SESSION_META_FOLDER.mkdir(parents=True, exist_ok=True) # YOLO_CONFIG_DIR points to /tmp/nemaquant/.yolo_config (set in Dockerfile ENV). # Create it here so ultralytics can write its cache on read-only container filesystems # (e.g. Apptainer SIF images). Path(os.environ.get('YOLO_CONFIG_DIR', '/tmp/nemaquant/.yolo_config')).mkdir(parents=True, exist_ok=True) print(f"Data root: /tmp/nemaquant | Weights: {WEIGHTS_FILE}") +# --------------------------------------------------------------------------- +# Session metadata helpers +# Flask's client-side cookie is limited to ~4KB. When many images are +# uploaded, filename_map / uuid_map_to_uuid_imgname can overflow. +# We persist them to disk so every route can recover them even when the +# cookie is absent or truncated (e.g. large batches, Apptainer --cleanenv, +# multi-worker gunicorn). +# --------------------------------------------------------------------------- +def _save_session_meta(session_id, filename_map, uuid_map): + meta_dir = SESSION_META_FOLDER / session_id + meta_dir.mkdir(parents=True, exist_ok=True) + with open(meta_dir / 'meta.json', 'w') as fh: + json.dump({'filename_map': filename_map, 'uuid_map_to_uuid_imgname': uuid_map}, fh) + +def _load_session_meta(session_id): + meta_path = SESSION_META_FOLDER / session_id / 'meta.json' + if meta_path.exists(): + with open(meta_path) as fh: + return json.load(fh) + return {} + # Load model once at startup, use CUDA if available MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu' @@ -118,6 +142,8 @@ def upload_files(): uuid_map_to_uuid_imgname[uuid_base] = uuid_name session['filename_map'] = filename_map session['uuid_map_to_uuid_imgname'] = uuid_map_to_uuid_imgname + # Persist to disk — cookie may be silently dropped if it exceeds ~4KB + _save_session_meta(session_id, filename_map, uuid_map_to_uuid_imgname) return jsonify({'filename_map': filename_map, 'status': 'uploaded'}) # /preview route for serving original uploaded image @@ -127,7 +153,8 @@ def preview_image(): data = request.get_json() uuid = data.get('uuid') session_id = session['id'] - uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) + _meta = _load_session_meta(session_id) + uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) if not img_name: print(f"/preview: No img_name found for uuid {uuid}") @@ -249,10 +276,12 @@ def get_progress(): job_state['status'] = 'completed' job_state['progress'] = 100 session['job_state'] = job_state + _meta = _load_session_meta(session_id) + _filename_map = session.get('filename_map') or _meta.get('filename_map', {}) resp = { 'status': 'completed', 'progress': 100, - 'filename_map': session.get('filename_map', {}), + 'filename_map': _filename_map, 'session_id': job_state.get('sessionId'), 'error': job_state.get('error'), } @@ -306,9 +335,10 @@ def annotate_image(): uuid = data.get('uuid') confidence = float(data.get('confidence', 0.5)) session_id = session['id'] - uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) + _meta = _load_session_meta(session_id) + uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) - orig_img_name = session.get('filename_map', {}).get(uuid) + orig_img_name = (session.get('filename_map') or _meta.get('filename_map', {})).get(uuid) if not img_name: return jsonify({'error': 'File not found'}), 404 @@ -345,8 +375,9 @@ def export_images(): data = request.get_json() confidence = float(data.get('confidence', 0.5)) session_id = session['id'] - filename_map = session.get('filename_map', {}) - uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname', {}) + _meta = _load_session_meta(session_id) + filename_map = session.get('filename_map') or _meta.get('filename_map', {}) + uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) # ensure there's a landing spot annot_dir = Path(app.config['ANNOT_FOLDER']) / session_id annot_dir.mkdir(parents=True, exist_ok=True) @@ -396,7 +427,8 @@ def export_csv(): data = request.json session_id = session['id'] job_state = session.get('job_state') - filename_map = session.get('filename_map') or {} + _meta = _load_session_meta(session_id) + filename_map = session.get('filename_map') or _meta.get('filename_map', {}) threshold = float(data.get('confidence', 0.5)) if not job_state: return jsonify({'error': 'Job not found'}), 404 From 2dc014988551004fa26182cb4eec05a9f5dacdf1 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 17:03:31 -0400 Subject: [PATCH 18/24] fix for windows --- .github/workflows/build-images.yml | 2 +- .github/workflows/update-dockerhub-meta.yml | 2 +- CHANGELOG.md | 27 +++++++++++++++++++++ static/script.js | 8 +++++- 4 files changed, 36 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 79e3219..74707aa 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -65,7 +65,7 @@ jobs: platforms: linux/amd64 provenance: false sbom: false - outputs: type=image,push=true,compression=zstd,compression-level=3,force-compression=true,oci-mediatypes=true + push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha,scope=${{ matrix.variant }} diff --git a/.github/workflows/update-dockerhub-meta.yml b/.github/workflows/update-dockerhub-meta.yml index d39340f..eddd675 100644 --- a/.github/workflows/update-dockerhub-meta.yml +++ b/.github/workflows/update-dockerhub-meta.yml @@ -2,7 +2,7 @@ name: Update Docker Hub Metadata on: push: - branches: [ main, docker_beauty ] + branches: [ main ] paths: - README.md # re-run when README changes - .github/workflows/update-dockerhub-meta.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 407e5fc..2c12218 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,33 @@ Docker image is versioned based on this file. Please, follow the exact format to track versions and updates. Add new versions on top of olders. +## [0.0.2] - 2026-03-19 + +### Fixed — Docker image layer compression (Windows compatibility) +- Switched CI build output from `compression=zstd,oci-mediatypes=true` back to default gzip — zstd-compressed layers with OCI media types cause `failed to register layer: invalid tar header` on Windows Docker Desktop and older Apptainer versions regardless of Docker Engine version + +### Fixed — Apptainer / Singularity compatibility +- `CMD` now uses absolute path `/home/user/app/app.py` instead of relative `app.py` — Apptainer ignores Docker's `WORKDIR` and used the host's cwd, causing "No such file or directory" on launch +- `PYTHONPATH` baked into image as `/home/user/.local/lib/python3.12/site-packages` — Apptainer `--cleanenv` resets `HOME`, so packages installed under `~/.local` were not found (e.g. `ModuleNotFoundError: No module named 'cv2'`) +- `YOLO_CONFIG_DIR` moved from `/home/user/app/.yolo_config` to `/tmp/nemaquant/.yolo_config` — the SIF container image is read-only under Apptainer, causing repeated "Read-only file system" errors when ultralytics tried to write its cache + +### Fixed — Session data lost for large image batches +- Flask client-side cookies are limited to ~4 KB; uploading many images caused `filename_map` and `uuid_map_to_uuid_imgname` to overflow and be silently dropped by the browser, breaking Image Preview and annotation after processing +- Added `_save_session_meta()` / `_load_session_meta()` helpers that persist both maps to `/tmp/nemaquant/sessions//meta.json` +- All routes (`/preview`, `/annotate`, `/export_images`, `/export_csv`, `/progress`) now fall back to disk if the cookie is empty or missing + +### Fixed — `KeyError: 'filename_map'` in `/annotate` and `/export_csv` +- `session['filename_map']` replaced with `session.get('filename_map', {})` throughout — avoids crash when session data is missing after container restart or cookie expiry + +### Fixed — Session / cookie issues on HF Spaces (HTTPS proxy) +- Added `FLASK_SECRET_KEY` support: app reads from environment variable so the key is stable across gunicorn workers and restarts; falls back to a random key with a warning +- `SESSION_COOKIE_SECURE=True`, `SESSION_COOKIE_SAMESITE='None'` applied automatically when running on HF Spaces (detected via `SPACE_HOST`/`SPACE_ID` env vars) +- All `fetch()` calls in `static/script.js` now include `credentials: 'include'` so session cookies are forwarded on the HTTPS proxy + +### Changed — Runtime directories on HF Spaces / Apptainer +- `uploads/`, `results/`, `annotated/`, `.yolo_config/` moved from the container image layer (`/home/user/app/`) to `/tmp/nemaquant/` — avoids overlay filesystem permission errors on HF Spaces and read-only filesystem errors on Apptainer +- All directories created at app startup with `mkdir(parents=True, exist_ok=True)` so no manual setup is needed + ## [0.0.1] - 2026-03-19 - Docker images split by CPU or GPU usage diff --git a/static/script.js b/static/script.js index d8353a6..73dfdce 100644 --- a/static/script.js +++ b/static/script.js @@ -246,6 +246,7 @@ document.addEventListener('DOMContentLoaded', () => { try { const response = await fetch('/uploads', { method: 'POST', + credentials: 'include', body: formData }); if (response.ok) { @@ -361,6 +362,7 @@ document.addEventListener('DOMContentLoaded', () => { try { const response = await fetch('/process', { method: 'POST', + credentials: 'include', body: formData, }); if (!response.ok) { @@ -482,12 +484,14 @@ document.addEventListener('DOMContentLoaded', () => { if (isCompleted) { response = await fetch('/annotate', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ uuid: uuid, confidence }) }); } else { response = await fetch('/preview', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ uuid: uuid }) }); @@ -537,7 +541,7 @@ document.addEventListener('DOMContentLoaded', () => { progressInterval = setInterval(async () => { try { - const response = await fetch(`/progress`); + const response = await fetch('/progress', { credentials: 'include' }); if (!response.ok) { let errorText = `Progress check failed: ${response.status}`; try { @@ -1055,6 +1059,7 @@ document.addEventListener('DOMContentLoaded', () => { try { const resp = await fetch('/export_csv', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ confidence: threshold }) }); @@ -1092,6 +1097,7 @@ document.addEventListener('DOMContentLoaded', () => { logStatus('Preparing annotated images for download...'); const resp = await fetch('/export_images', { method: 'POST', + credentials: 'include', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ confidence: threshold }) }); From 1f858f1a675b6695c73931c6295d55f65bed68e9 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 17:24:14 -0400 Subject: [PATCH 19/24] now fixing Mac --- README.md | 2 +- app.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 1c8f4b9..0cbf765 100644 --- a/README.md +++ b/README.md @@ -139,7 +139,7 @@ For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` - With Docker ```bash - docker run -p 7860:7860 -v $(pwd)/results:/app/results breedinginsight/nemaquant + docker run -p 7860:7860 breedinginsight/nemaquant ``` - With Apptainer/Singularity + Slurm from our local computer (after running the above commands on server): diff --git a/app.py b/app.py index e7e0f5d..55d88d8 100644 --- a/app.py +++ b/app.py @@ -501,16 +501,15 @@ def print_startup_info(): except AttributeError: print("User running process: UID/GID not available on this OS") - for path_str in ["/app/uploads", "/app/results"]: - path_obj = Path(path_str) + for path_obj in [UPLOAD_FOLDER, RESULTS_FOLDER, ANNOT_FOLDER]: if path_obj.exists(): stat_info = path_obj.stat() permissions = oct(stat_info.st_mode)[-3:] owner = f"{stat_info.st_uid}:{stat_info.st_gid}" - print(f"Permissions for {path_str}: {permissions}") - print(f"Owner for {path_str}: {owner}") + print(f"Permissions for {path_obj}: {permissions}") + print(f"Owner for {path_obj}: {owner}") else: - print(f"Directory {path_str} does not exist.") + print(f"Directory {path_obj} does not exist.") # some cleanup steps - not sure quite where to put these print('Running periodic cleanup of old sessions...') From 7c94ae86e6a2671641dc0f6220fd590a4c9b64fc Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 17:44:16 -0400 Subject: [PATCH 20/24] fix gpu container --- CHANGELOG.md | 14 ++++++++ app.py | 87 +++++++++++++++++++++++++++++++++++++----------- static/script.js | 7 +++- 3 files changed, 88 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c12218..8ff4cbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ Docker image is versioned based on this file. Please, follow the exact format to track versions and updates. Add new versions on top of olders. +## [0.0.3] - 2026-03-19 + +### Fixed — HF Spaces: "No such file or directory" after upload +- `/uploads` now returns `session_id` in its JSON response; the JS stores it and sends it back as a form field in `/process` and as a query param in `/progress` +- `/process` and `/progress` use the client-supplied `session_id` as a fallback when the session cookie is absent or points to a different session (common on HF Spaces HTTPS proxy where `SameSite=None` cookies are not always forwarded) + +### Fixed — GPU: "Cannot re-initialize CUDA in forked subprocess" +- Replaced `multiprocessing.Pool` with `concurrent.futures.ThreadPoolExecutor` for GPU inference — Pool uses `fork` by default, which copies the parent's CUDA context into workers causing a crash; threads share the parent context without re-initialising it +- GPU model (`_gpu_model`) is now loaded once at startup and reused by the thread worker `process_single_image_thread()`, avoiding redundant model loads +- CPU path unchanged: continues to use `Pool` with per-worker `init_worker()` for true parallelism + +### Fixed — Docker image: `Directory /app/uploads does not exist` warning +- Diagnostic startup check was hardcoded to old `/app/uploads` and `/app/results` paths; now uses `UPLOAD_FOLDER`, `RESULTS_FOLDER`, `ANNOT_FOLDER` constants (`/tmp/nemaquant/…`) + ## [0.0.2] - 2026-03-19 ### Fixed — Docker image layer compression (Windows compatibility) diff --git a/app.py b/app.py index 55d88d8..121fd64 100644 --- a/app.py +++ b/app.py @@ -19,6 +19,7 @@ from flask import Flask, Response, render_template, request, jsonify, send_file, session from multiprocessing.pool import Pool from multiprocessing import set_start_method +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from PIL import Image from datetime import datetime @@ -98,6 +99,25 @@ def _load_session_meta(session_id): # Load model once at startup, use CUDA if available MODEL_DEVICE = 'cuda' if cuda.is_available() else 'cpu' +# For GPU: load the model globally at startup so threads can reuse it without +# re-initialising CUDA (forked Pool workers cannot re-init CUDA in the child). +# For CPU: model is loaded per-worker in init_worker() instead. +_gpu_model = None +if MODEL_DEVICE == 'cuda': + _gpu_model = YOLO(str(WEIGHTS_FILE)) + _gpu_model.to('cuda') + print(f'GPU model loaded at startup on {MODEL_DEVICE}') + +# Wrapper so GPU futures (concurrent.futures.Future) expose the same +# .ready() interface as multiprocessing AsyncResult. +class _FutureWrapper: + def __init__(self, future): + self._f = future + def ready(self): + return self._f.done() + def get(self): + return self._f.result() + # need a global dict to hold async results objects # so you can check the progress of an abr # maybe there's a better way around this? @@ -144,7 +164,7 @@ def upload_files(): session['uuid_map_to_uuid_imgname'] = uuid_map_to_uuid_imgname # Persist to disk — cookie may be silently dropped if it exceeds ~4KB _save_session_meta(session_id, filename_map, uuid_map_to_uuid_imgname) - return jsonify({'filename_map': filename_map, 'status': 'uploaded'}) + return jsonify({'filename_map': filename_map, 'session_id': session_id, 'status': 'uploaded'}) # /preview route for serving original uploaded image @app.route('/preview', methods=['POST']) @@ -184,15 +204,12 @@ def preview_image(): return jsonify({'error': str(e)}), 500 # initializer for Pool to load model in each process -# each worker will have its own model instance +# each worker will have its own model instance (CPU only) def init_worker(model_path): global model model = YOLO(model_path) - if MODEL_DEVICE == 'cuda': - model.to('cuda') -# not sure if we need this decorator anymore? -#@ThreadingLocked() +# CPU pool worker — uses per-worker model loaded by init_worker() def process_single_image(img_path, results_dir): global model uuid_base = img_path.stem @@ -202,10 +219,33 @@ def process_single_image(img_path, results_dir): pickle.dump(results, pf) return uuid_base +# GPU thread worker — reuses the global _gpu_model loaded at startup +def process_single_image_thread(img_path, results_dir): + global _gpu_model + uuid_base = img_path.stem + pickle_path = results_dir / f"{uuid_base}.pkl" + results = detect_in_image(_gpu_model, str(img_path)) + with open(pickle_path, 'wb') as pf: + pickle.dump(results, pf) + return uuid_base + @app.route('/process', methods=['POST']) def start_processing(): session_id = session['id'] + # The client echoes back the session_id it received from /uploads. + # On HF Spaces the session cookie can be missing on subsequent requests + # (HTTPS proxy / SameSite), so we fall back to the client-supplied id + # when the cookie-based id doesn't have an upload directory. + client_session_id = request.form.get('session_id', '') upload_dir_check = Path(app.config['UPLOAD_FOLDER']) / session_id + if not upload_dir_check.exists() and client_session_id: + fallback_dir = Path(app.config['UPLOAD_FOLDER']) / client_session_id + if fallback_dir.exists(): + print(f"DEBUG /process: cookie session {session_id} has no upload dir; " + f"using client-supplied session {client_session_id}") + session_id = client_session_id + session['id'] = session_id + upload_dir_check = fallback_dir print(f"DEBUG /process: session_id={session_id}, upload_dir={upload_dir_check}, exists={upload_dir_check.exists()}") print(f"DEBUG /process: /tmp/nemaquant/uploads contents={list(Path(app.config['UPLOAD_FOLDER']).iterdir()) if Path(app.config['UPLOAD_FOLDER']).exists() else 'UPLOAD_FOLDER missing'}") job_state = { @@ -226,21 +266,22 @@ def start_processing(): try: if MODEL_DEVICE == 'cuda': - n_proc = 1 + # GPU: run in a single thread so CUDA is never re-initialised in a + # forked subprocess (Pool uses fork by default, which breaks CUDA). + def _gpu_task(): + for img_path, res_dir in arg_list: + process_single_image_thread(img_path, res_dir) + executor = ThreadPoolExecutor(max_workers=1) + future = executor.submit(_gpu_task) + executor.shutdown(wait=False) + async_results[session_id] = _FutureWrapper(future) else: n_proc = os.cpu_count() - # Initialize job state - job_state = { - "status": "starting", - "progress": 0, - "started": True - } - session['job_state'] = job_state - pool = Pool(processes=n_proc, - initializer=init_worker, - initargs=(str(WEIGHTS_FILE),)) - async_results[session_id] = pool.starmap_async(process_single_image, arg_list) - pool.close() + pool = Pool(processes=n_proc, + initializer=init_worker, + initargs=(str(WEIGHTS_FILE),)) + async_results[session_id] = pool.starmap_async(process_single_image, arg_list) + pool.close() # Update job state after process launch job_state["status"] = "processing" @@ -258,8 +299,16 @@ def start_processing(): @app.route('/progress') def get_progress(): session_id = session['id'] + # Accept client-supplied session_id as fallback (cookie may be missing on HF Spaces) + client_session_id = request.args.get('session_id', '') + if client_session_id and session_id not in async_results and client_session_id in async_results: + session_id = client_session_id + session['id'] = session_id try: job_state = session.get('job_state') + # If session lost job_state but we have an async_result, reconstruct from disk + if not job_state and session_id in async_results: + job_state = {'status': 'processing', 'progress': 0, 'sessionId': session_id} if not job_state: print("/progress: No job_state found in session.") return jsonify({"status": "error", "error": "No job state"}), 404 diff --git a/static/script.js b/static/script.js index 73dfdce..d25aad4 100644 --- a/static/script.js +++ b/static/script.js @@ -33,6 +33,7 @@ document.addEventListener('DOMContentLoaded', () => { let currentJobId = null; let currentZoomLevel = 1; let filenameMap = {}; + let uploadSessionId = ''; // echoed back to /process as cookie-independent fallback const MAX_ZOOM = 3; const MIN_ZOOM = 0.5; let progressInterval = null; // Interval timer for polling @@ -253,6 +254,7 @@ document.addEventListener('DOMContentLoaded', () => { const data = await response.json(); logStatus('Files uploaded successfully.'); filenameMap = data.filename_map || {}; + uploadSessionId = data.session_id || ''; // Update results table with filenames and View buttons resultsTableBody.innerHTML = ''; @@ -358,6 +360,9 @@ document.addEventListener('DOMContentLoaded', () => { } formData.append('input_mode', mode); formData.append('confidence_threshold', confidenceSlider.value); + // Send back the session_id from /uploads so the server can recover the + // correct upload directory when the session cookie is missing (HF Spaces). + if (uploadSessionId) formData.append('session_id', uploadSessionId); try { const response = await fetch('/process', { @@ -541,7 +546,7 @@ document.addEventListener('DOMContentLoaded', () => { progressInterval = setInterval(async () => { try { - const response = await fetch('/progress', { credentials: 'include' }); + const response = await fetch(`/progress?session_id=${encodeURIComponent(uploadSessionId)}`, { credentials: 'include' }); if (!response.ok) { let errorText = `Progress check failed: ${response.status}`; try { From 8d59aee7a4b0e9ca1d770c6f3ba0f5b467be1937 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 18:23:35 -0400 Subject: [PATCH 21/24] still trying to fix hf --- README.md | 2 +- app.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 0cbf765..38b32bb 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` apptainer run --cleanenv --env PORT=$PORT nemaquant_latest.sif ``` -For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` +For **GPU usage** replace the image by: `breedinginsight/nemaquant:latest-gpu` and the option `--nv` for apptainer/singularity run. 2. **Run the Container**: diff --git a/app.py b/app.py index 121fd64..074acf9 100644 --- a/app.py +++ b/app.py @@ -237,15 +237,16 @@ def start_processing(): # (HTTPS proxy / SameSite), so we fall back to the client-supplied id # when the cookie-based id doesn't have an upload directory. client_session_id = request.form.get('session_id', '') - upload_dir_check = Path(app.config['UPLOAD_FOLDER']) / session_id - if not upload_dir_check.exists() and client_session_id: - fallback_dir = Path(app.config['UPLOAD_FOLDER']) / client_session_id - if fallback_dir.exists(): - print(f"DEBUG /process: cookie session {session_id} has no upload dir; " - f"using client-supplied session {client_session_id}") + if client_session_id: + # Prefer the client-supplied id unconditionally — it's the authoritative + # id from the /uploads call; the cookie may point to a different worker session. + client_dir = Path(app.config['UPLOAD_FOLDER']) / client_session_id + if client_dir.exists() or not (Path(app.config['UPLOAD_FOLDER']) / session_id).exists(): + print(f"DEBUG /process: using client-supplied session {client_session_id} " + f"(cookie session was {session_id})") session_id = client_session_id session['id'] = session_id - upload_dir_check = fallback_dir + upload_dir_check = Path(app.config['UPLOAD_FOLDER']) / session_id print(f"DEBUG /process: session_id={session_id}, upload_dir={upload_dir_check}, exists={upload_dir_check.exists()}") print(f"DEBUG /process: /tmp/nemaquant/uploads contents={list(Path(app.config['UPLOAD_FOLDER']).iterdir()) if Path(app.config['UPLOAD_FOLDER']).exists() else 'UPLOAD_FOLDER missing'}") job_state = { @@ -256,15 +257,26 @@ def start_processing(): session['job_state'] = job_state upload_dir = Path(app.config['UPLOAD_FOLDER']) / session_id results_dir = Path(app.config['RESULTS_FOLDER']) / session_id - # clean out old results if needed - if results_dir.exists(): - shutil.rmtree(results_dir) - results_dir.mkdir(parents=True) - - # set up iterable of uploaded files to process - arg_list = [(x,results_dir) for x in list(upload_dir.iterdir())] try: + # Fail fast with a clear message if the upload directory is missing + if not upload_dir.exists(): + available = [d.name for d in Path(app.config['UPLOAD_FOLDER']).iterdir()] \ + if Path(app.config['UPLOAD_FOLDER']).exists() else [] + msg = (f"Upload directory not found: {upload_dir}. " + f"cookie_session={session['id']}, client_session={request.form.get('session_id','')}, " + f"available={available}") + print(f"ERROR /process: {msg}") + return jsonify({'error': msg}), 500 + + # clean out old results if needed + if results_dir.exists(): + shutil.rmtree(results_dir) + results_dir.mkdir(parents=True) + + # set up iterable of uploaded files to process + arg_list = [(x, results_dir) for x in list(upload_dir.iterdir())] + if MODEL_DEVICE == 'cuda': # GPU: run in a single thread so CUDA is never re-initialised in a # forked subprocess (Pool uses fork by default, which breaks CUDA). From ded10b2c8bf0361526be3cfd8375c5d664941035 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 19:28:28 -0400 Subject: [PATCH 22/24] HF now works - fixing figure display --- app.py | 10 ++++++---- static/script.js | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 074acf9..390db9c 100644 --- a/app.py +++ b/app.py @@ -172,7 +172,8 @@ def preview_image(): try: data = request.get_json() uuid = data.get('uuid') - session_id = session['id'] + # Prefer client-supplied session_id (cookie may differ on HF Spaces HTTPS proxy) + session_id = data.get('session_id') or session['id'] _meta = _load_session_meta(session_id) uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) @@ -395,7 +396,8 @@ def annotate_image(): data = request.get_json() uuid = data.get('uuid') confidence = float(data.get('confidence', 0.5)) - session_id = session['id'] + # Prefer client-supplied session_id (cookie may differ on HF Spaces HTTPS proxy) + session_id = data.get('session_id') or session['id'] _meta = _load_session_meta(session_id) uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) img_name = uuid_map_to_uuid_imgname.get(uuid) @@ -435,7 +437,7 @@ def export_images(): try: data = request.get_json() confidence = float(data.get('confidence', 0.5)) - session_id = session['id'] + session_id = data.get('session_id') or session['id'] _meta = _load_session_meta(session_id) filename_map = session.get('filename_map') or _meta.get('filename_map', {}) uuid_map_to_uuid_imgname = session.get('uuid_map_to_uuid_imgname') or _meta.get('uuid_map_to_uuid_imgname', {}) @@ -486,7 +488,7 @@ def export_images(): def export_csv(): try: data = request.json - session_id = session['id'] + session_id = data.get('session_id') or session['id'] job_state = session.get('job_state') _meta = _load_session_meta(session_id) filename_map = session.get('filename_map') or _meta.get('filename_map', {}) diff --git a/static/script.js b/static/script.js index d25aad4..ef99cfe 100644 --- a/static/script.js +++ b/static/script.js @@ -491,14 +491,14 @@ document.addEventListener('DOMContentLoaded', () => { method: 'POST', credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ uuid: uuid, confidence }) + body: JSON.stringify({ uuid: uuid, confidence, session_id: uploadSessionId }) }); } else { response = await fetch('/preview', { method: 'POST', credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ uuid: uuid }) + body: JSON.stringify({ uuid: uuid, session_id: uploadSessionId }) }); } if (response.ok) { @@ -1066,7 +1066,7 @@ document.addEventListener('DOMContentLoaded', () => { method: 'POST', credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ confidence: threshold }) + body: JSON.stringify({ confidence: threshold, session_id: uploadSessionId }) }); if (!resp.ok) throw new Error('Failed to export CSV'); const blob = await resp.blob(); @@ -1104,7 +1104,7 @@ document.addEventListener('DOMContentLoaded', () => { method: 'POST', credentials: 'include', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ confidence: threshold }) + body: JSON.stringify({ confidence: threshold, session_id: uploadSessionId }) }); if (!resp.ok) throw new Error('Failed to export images'); const blob = await resp.blob(); From 62437f241d21dd95904e6792927a3801c801368d Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Thu, 19 Mar 2026 19:41:27 -0400 Subject: [PATCH 23/24] cleaning after fix --- CHANGELOG.md | 62 ++++++++++++++++++++++++---------------------------- app.py | 20 +---------------- 2 files changed, 30 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ff4cbc..09ee127 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,46 +2,42 @@ Docker image is versioned based on this file. Please, follow the exact format to track versions and updates. Add new versions on top of olders. -## [0.0.3] - 2026-03-19 +## [0.0.2] - 2026-03-19 -### Fixed — HF Spaces: "No such file or directory" after upload -- `/uploads` now returns `session_id` in its JSON response; the JS stores it and sends it back as a form field in `/process` and as a query param in `/progress` -- `/process` and `/progress` use the client-supplied `session_id` as a fallback when the session cookie is absent or points to a different session (common on HF Spaces HTTPS proxy where `SameSite=None` cookies are not always forwarded) +### Fixed — Hugging Face Spaces deployment -### Fixed — GPU: "Cannot re-initialize CUDA in forked subprocess" -- Replaced `multiprocessing.Pool` with `concurrent.futures.ThreadPoolExecutor` for GPU inference — Pool uses `fork` by default, which copies the parent's CUDA context into workers causing a crash; threads share the parent context without re-initialising it -- GPU model (`_gpu_model`) is now loaded once at startup and reused by the thread worker `process_single_image_thread()`, avoiding redundant model loads -- CPU path unchanged: continues to use `Pool` with per-worker `init_worker()` for true parallelism +HF Spaces runs the container behind an HTTPS reverse proxy. Flask's default session cookie settings and the 4 KB cookie size limit caused every route after `/uploads` to silently operate on a different session, making processing and preview fail. -### Fixed — Docker image: `Directory /app/uploads does not exist` warning -- Diagnostic startup check was hardcoded to old `/app/uploads` and `/app/results` paths; now uses `UPLOAD_FOLDER`, `RESULTS_FOLDER`, `ANNOT_FOLDER` constants (`/tmp/nemaquant/…`) +**Root cause:** the HTTPS proxy requires `SameSite=None; Secure` cookies to forward them cross-origin, but even with correct cookie settings the client-side cookie can be dropped when it exceeds ~4 KB (large batches) or when gunicorn assigns a different worker. The real fix was making the session ID travel explicitly in the request body rather than relying solely on the cookie. -## [0.0.2] - 2026-03-19 +**Changes:** +- `FLASK_SECRET_KEY` read from environment variable — required so the signed cookie is consistent across gunicorn workers and restarts. Falls back to a random key with a warning for local dev +- `SESSION_COOKIE_SECURE=True`, `SESSION_COOKIE_SAMESITE='None'` set automatically when HF Spaces env vars (`SPACE_HOST`, `SPACE_ID`) are detected +- All `fetch()` calls in `static/script.js` include `credentials: 'include'` +- `/uploads` returns `session_id` in its JSON response; JS stores it as `uploadSessionId` +- Every subsequent request sends `uploadSessionId` back explicitly: as a form field (`/process`), query param (`/progress`), or JSON body field (`/preview`, `/annotate`, `/export_csv`, `/export_images`) +- All server routes use `client_session_id or session['id']` — client-supplied id is authoritative since it came directly from the `/uploads` response +- `filename_map` and `uuid_map_to_uuid_imgname` persisted to `/tmp/nemaquant/sessions//meta.json` at upload time and loaded from disk in all routes as fallback when the cookie data is missing or truncated + +### Fixed — GPU: "Cannot re-initialize CUDA in forked subprocess" -### Fixed — Docker image layer compression (Windows compatibility) -- Switched CI build output from `compression=zstd,oci-mediatypes=true` back to default gzip — zstd-compressed layers with OCI media types cause `failed to register layer: invalid tar header` on Windows Docker Desktop and older Apptainer versions regardless of Docker Engine version +- `multiprocessing.Pool` (which uses `fork` by default) copies the parent's CUDA context into child processes, causing a crash when CUDA was already initialized at startup +- GPU path now uses `concurrent.futures.ThreadPoolExecutor` — threads share the parent's CUDA context without re-initializing it +- GPU model (`_gpu_model`) loaded once at startup; CPU model loaded per-worker via `init_worker()` as before ### Fixed — Apptainer / Singularity compatibility -- `CMD` now uses absolute path `/home/user/app/app.py` instead of relative `app.py` — Apptainer ignores Docker's `WORKDIR` and used the host's cwd, causing "No such file or directory" on launch -- `PYTHONPATH` baked into image as `/home/user/.local/lib/python3.12/site-packages` — Apptainer `--cleanenv` resets `HOME`, so packages installed under `~/.local` were not found (e.g. `ModuleNotFoundError: No module named 'cv2'`) -- `YOLO_CONFIG_DIR` moved from `/home/user/app/.yolo_config` to `/tmp/nemaquant/.yolo_config` — the SIF container image is read-only under Apptainer, causing repeated "Read-only file system" errors when ultralytics tried to write its cache - -### Fixed — Session data lost for large image batches -- Flask client-side cookies are limited to ~4 KB; uploading many images caused `filename_map` and `uuid_map_to_uuid_imgname` to overflow and be silently dropped by the browser, breaking Image Preview and annotation after processing -- Added `_save_session_meta()` / `_load_session_meta()` helpers that persist both maps to `/tmp/nemaquant/sessions//meta.json` -- All routes (`/preview`, `/annotate`, `/export_images`, `/export_csv`, `/progress`) now fall back to disk if the cookie is empty or missing - -### Fixed — `KeyError: 'filename_map'` in `/annotate` and `/export_csv` -- `session['filename_map']` replaced with `session.get('filename_map', {})` throughout — avoids crash when session data is missing after container restart or cookie expiry - -### Fixed — Session / cookie issues on HF Spaces (HTTPS proxy) -- Added `FLASK_SECRET_KEY` support: app reads from environment variable so the key is stable across gunicorn workers and restarts; falls back to a random key with a warning -- `SESSION_COOKIE_SECURE=True`, `SESSION_COOKIE_SAMESITE='None'` applied automatically when running on HF Spaces (detected via `SPACE_HOST`/`SPACE_ID` env vars) -- All `fetch()` calls in `static/script.js` now include `credentials: 'include'` so session cookies are forwarded on the HTTPS proxy - -### Changed — Runtime directories on HF Spaces / Apptainer -- `uploads/`, `results/`, `annotated/`, `.yolo_config/` moved from the container image layer (`/home/user/app/`) to `/tmp/nemaquant/` — avoids overlay filesystem permission errors on HF Spaces and read-only filesystem errors on Apptainer -- All directories created at app startup with `mkdir(parents=True, exist_ok=True)` so no manual setup is needed + +- `CMD` uses absolute path `/home/user/app/app.py` — Apptainer ignores `WORKDIR` and uses the host's cwd, causing "No such file or directory" at startup +- `PYTHONPATH=/home/user/.local/lib/python3.12/site-packages` baked into the image — `--cleanenv` resets `HOME` so pip user packages were not found (`ModuleNotFoundError: No module named 'cv2'`) +- `YOLO_CONFIG_DIR` moved to `/tmp/nemaquant/.yolo_config` — the SIF image is read-only, ultralytics could not write its cache to the image layer + +### Fixed — Docker image layer format (Windows compatibility) + +- Removed `compression=zstd,oci-mediatypes=true` from the CI build — zstd layers with OCI media types cause `failed to register layer: invalid tar header` on Windows Docker Desktop and Apptainer regardless of engine version. Reverted to default gzip (Docker schema v2) + +### Changed — Runtime data directories + +- `uploads/`, `results/`, `annotated/`, `.yolo_config/` moved from `/home/user/app/` (baked into the image layer) to `/tmp/nemaquant/` — avoids overlay filesystem write errors on HF Spaces and read-only filesystem errors on Apptainer. All directories created at app startup ## [0.0.1] - 2026-03-19 diff --git a/app.py b/app.py index 390db9c..de62300 100644 --- a/app.py +++ b/app.py @@ -12,13 +12,11 @@ import shutil import logging from ultralytics import YOLO -# from ultralytics.utils import ThreadingLocked import numpy as np import pandas as pd from torch import cuda from flask import Flask, Response, render_template, request, jsonify, send_file, session from multiprocessing.pool import Pool -from multiprocessing import set_start_method from concurrent.futures import ThreadPoolExecutor from pathlib import Path from PIL import Image @@ -118,9 +116,7 @@ def ready(self): def get(self): return self._f.result() -# need a global dict to hold async results objects -# so you can check the progress of an abr -# maybe there's a better way around this? +# Global dict mapping session_id -> async result (Pool AsyncResult or _FutureWrapper) async_results = {} @app.errorhandler(Exception) @@ -129,9 +125,6 @@ def handle_exception(e): print(traceback.format_exc()) return jsonify({"error": "Server error", "log": str(e)}), 500 -# def allowed_file(filename): -# return '.' in filename and filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS'] - @app.route('/') def index(): return render_template('index.html') @@ -142,12 +135,10 @@ def upload_files(): session_id = session['id'] files = request.files.getlist('files') upload_dir = Path(app.config['UPLOAD_FOLDER']) / session_id - print(f"DEBUG /uploads: session_id={session_id}, upload_dir={upload_dir}") # clear out any existing files for the session if upload_dir.exists(): shutil.rmtree(upload_dir) upload_dir.mkdir(parents=True, exist_ok=True) - print(f"DEBUG /uploads: dir created, exists={upload_dir.exists()}") # generate new unique filenames via uuid, save the mapping dict of old:new to session filename_map = {} uuid_map_to_uuid_imgname = {} @@ -243,13 +234,8 @@ def start_processing(): # id from the /uploads call; the cookie may point to a different worker session. client_dir = Path(app.config['UPLOAD_FOLDER']) / client_session_id if client_dir.exists() or not (Path(app.config['UPLOAD_FOLDER']) / session_id).exists(): - print(f"DEBUG /process: using client-supplied session {client_session_id} " - f"(cookie session was {session_id})") session_id = client_session_id session['id'] = session_id - upload_dir_check = Path(app.config['UPLOAD_FOLDER']) / session_id - print(f"DEBUG /process: session_id={session_id}, upload_dir={upload_dir_check}, exists={upload_dir_check.exists()}") - print(f"DEBUG /process: /tmp/nemaquant/uploads contents={list(Path(app.config['UPLOAD_FOLDER']).iterdir()) if Path(app.config['UPLOAD_FOLDER']).exists() else 'UPLOAD_FOLDER missing'}") job_state = { "status": "starting", "progress": 0, @@ -534,10 +520,6 @@ def export_csv(): def ensure_session(): if 'id' not in session: session['id'] = uuid.uuid4().hex - print(f"New session started: {session['id']}") - else: - pass - # print(f"Existing session: {session['id']}") def print_startup_info(): From fda60f25e677b54ae75c44b10753bfed6cd8a549 Mon Sep 17 00:00:00 2001 From: Cristianetaniguti Date: Fri, 20 Mar 2026 10:02:03 -0400 Subject: [PATCH 24/24] fixing drag --- CHANGELOG.md | 5 +++ static/script.js | 84 ++++++++++++++++++++++++------------------------ 2 files changed, 47 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09ee127..e9237a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,11 @@ HF Spaces runs the container behind an HTTPS reverse proxy. Flask's default sess - Removed `compression=zstd,oci-mediatypes=true` from the CI build — zstd layers with OCI media types cause `failed to register layer: invalid tar header` on Windows Docker Desktop and Apptainer regardless of engine version. Reverted to default gzip (Docker schema v2) +### Fixed — Drag-and-drop file upload not working + +- Drag-and-drop called `handleFiles()` (which set the valid file list) but never called `/uploads`, so no files were on the server when "Start Processing" was clicked. The `fileInput.files = files` assignment at the end of `handleFiles()` was a silent no-op — `fileInput.files` is read-only +- Extracted the `/uploads` fetch into a shared `uploadFilesToServer()` function; both the file-picker `change` event and the `drop` event now call it + ### Changed — Runtime data directories - `uploads/`, `results/`, `annotated/`, `.yolo_config/` moved from `/home/user/app/` (baked into the image layer) to `/tmp/nemaquant/` — avoids overlay filesystem write errors on HF Spaces and read-only filesystem errors on Apptainer. All directories created at app startup diff --git a/static/script.js b/static/script.js index ef99cfe..ab8cfd5 100644 --- a/static/script.js +++ b/static/script.js @@ -162,7 +162,6 @@ document.addEventListener('DOMContentLoaded', () => { filteredValidFiles = validFiles; const invalidFiles = Array.from(files).filter(file => !allowedTypes.includes(file.type)); - // Only print invalid file warnings if not in Keyence mode if (invalidFiles.length > 0 && inputMode.value !== 'keyence') { logStatus(`Warning: Skipped ${invalidFiles.length} invalid files. Only PNG, JPG, and TIFF are supported.`); @@ -189,7 +188,6 @@ document.addEventListener('DOMContentLoaded', () => { `; fileList.appendChild(summaryDiv); - fileInput.files = files; updateUploadState(validFiles.length); } @@ -228,9 +226,10 @@ document.addEventListener('DOMContentLoaded', () => { dropZone.classList.remove('drag-over'); } - dropZone.addEventListener('drop', (e) => { + dropZone.addEventListener('drop', async (e) => { const dt = e.dataTransfer; handleFiles(dt.files); + await uploadFilesToServer(); }); // Click to upload @@ -238,49 +237,50 @@ document.addEventListener('DOMContentLoaded', () => { fileInput.click(); }); - fileInput.addEventListener('change', async () => { - handleFiles(fileInput.files); - if (filteredValidFiles && filteredValidFiles.length > 0) { - // Prepare FormData for upload - const formData = new FormData(); - filteredValidFiles.forEach(f => formData.append('files', f)); - try { - const response = await fetch('/uploads', { - method: 'POST', - credentials: 'include', - body: formData + async function uploadFilesToServer() { + if (!filteredValidFiles || filteredValidFiles.length === 0) return; + const formData = new FormData(); + filteredValidFiles.forEach(f => formData.append('files', f)); + try { + const response = await fetch('/uploads', { + method: 'POST', + credentials: 'include', + body: formData + }); + if (response.ok) { + const data = await response.json(); + logStatus('Files uploaded successfully.'); + filenameMap = data.filename_map || {}; + uploadSessionId = data.session_id || ''; + + // Update results table with filenames and View buttons + resultsTableBody.innerHTML = ''; + Object.entries(filenameMap).forEach(([uuid, originalFilename], idx) => { + const row = resultsTableBody.insertRow(); + row.dataset.originalIndex = idx; + row.innerHTML = ` + ${originalFilename} + NA + + `; }); - if (response.ok) { - const data = await response.json(); - logStatus('Files uploaded successfully.'); - filenameMap = data.filename_map || {}; - uploadSessionId = data.session_id || ''; - - // Update results table with filenames and View buttons - resultsTableBody.innerHTML = ''; - Object.entries(filenameMap).forEach(([uuid, originalFilename], idx) => { - const row = resultsTableBody.insertRow(); - row.dataset.originalIndex = idx; - row.innerHTML = ` - ${originalFilename} - NA - - `; - }); - // Add click event for View buttons - resultsTableBody.querySelectorAll('.view-button').forEach(btn => { - btn.addEventListener('click', (e) => { - const idx = parseInt(btn.dataset.index, 10); - displayImage(idx); - }); + resultsTableBody.querySelectorAll('.view-button').forEach(btn => { + btn.addEventListener('click', (e) => { + const idx = parseInt(btn.dataset.index, 10); + displayImage(idx); }); - } else { - logStatus('File upload failed.'); - } - } catch (err) { - logStatus('Error uploading files: ' + err); + }); + } else { + logStatus('File upload failed.'); } + } catch (err) { + logStatus('Error uploading files: ' + err); } + } + + fileInput.addEventListener('change', async () => { + handleFiles(fileInput.files); + await uploadFilesToServer(); }); // Input mode change