From d3174587dc00b8e0afa0413611028919341deb2b Mon Sep 17 00:00:00 2001 From: Andrew Beveridge Date: Thu, 26 Mar 2026 12:35:16 -0400 Subject: [PATCH] fix: synchronous endpoint + no semaphore, let Cloud Run scale like Modal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fire-and-forget + semaphore design caused all jobs to queue on one instance. Cloud Run couldn't see background threads as "busy" so it never scaled to new instances. Fix: make endpoint synchronous (await executor) with concurrency=1. Cloud Run sees each request as active during processing and scales to new GPU instances for concurrent jobs — matching Modal's .spawn(). Increase client POST timeout to 1800s to match. Co-Authored-By: Claude Opus 4.6 (1M context) --- audio_separator/remote/api_client.py | 4 ++-- audio_separator/remote/deploy_cloudrun.py | 29 ++++++++--------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/audio_separator/remote/api_client.py b/audio_separator/remote/api_client.py index 1ff8ec6..977b6e5 100644 --- a/audio_separator/remote/api_client.py +++ b/audio_separator/remote/api_client.py @@ -148,7 +148,7 @@ def separate_audio( data["custom_output_names"] = json.dumps(custom_output_names) try: - # Server returns immediately with task_id; 60s is generous for submission + # Server processes synchronously; 1800s matches Cloud Run request timeout. # When using gcs_uri (no file upload), we still need multipart/form-data # encoding because FastAPI requires it for endpoints with File()/Form() params. # Passing a dummy empty file field forces requests to use multipart encoding. @@ -158,7 +158,7 @@ def separate_audio( f"{self.api_url}/separate", files=files, data=data, - timeout=60, + timeout=1800, ) response.raise_for_status() return response.json() diff --git a/audio_separator/remote/deploy_cloudrun.py b/audio_separator/remote/deploy_cloudrun.py index 0f5fa12..ccd2c8d 100644 --- a/audio_separator/remote/deploy_cloudrun.py +++ b/audio_separator/remote/deploy_cloudrun.py @@ -55,7 +55,6 @@ models_ready = False # --- Async job infrastructure --- -gpu_semaphore = threading.Semaphore(1) OUTPUT_BUCKET = os.environ.get("OUTPUT_BUCKET", "nomadkaraoke-audio-separator-outputs") GCP_PROJECT = os.environ.get("GCP_PROJECT", "nomadkaraoke") @@ -231,11 +230,8 @@ def update_status(status: str, progress: int = 0, error: str = None, files: dict except Exception as e: logger.warning(f"[{task_id}] Failed to update Firestore status: {e}") - # Wait for GPU availability - update_status("queued", 0) - logger.info(f"[{task_id}] Waiting for GPU semaphore...") - gpu_semaphore.acquire() - logger.info(f"[{task_id}] GPU semaphore acquired, starting separation") + update_status("processing", 0) + logger.info(f"[{task_id}] Starting separation") try: os.makedirs(f"{STORAGE_DIR}/outputs/{task_id}", exist_ok=True) output_dir = f"{STORAGE_DIR}/outputs/{task_id}" @@ -379,8 +375,7 @@ def update_status(status: str, progress: int = 0, error: str = None, files: dict return {"task_id": task_id, "status": "error", "error": str(e), "models_used": models_used} finally: - gpu_semaphore.release() - logger.info(f"[{task_id}] GPU semaphore released") + logger.info(f"[{task_id}] Separation finished, cleaning up local files") # Clean up local files (outputs are in GCS now) output_dir = f"{STORAGE_DIR}/outputs/{task_id}" if os.path.exists(output_dir): @@ -507,9 +502,12 @@ async def separate_audio( "instance_id": instance_id, }) - # Fire-and-forget: run separation in background thread + # Run separation synchronously — Cloud Run keeps this request active, + # which lets the autoscaler know this instance is busy and route new + # requests to new instances (with concurrency=1). + # This matches Modal's .spawn() pattern: each job gets its own GPU instance. loop = asyncio.get_event_loop() - loop.run_in_executor( + result = await loop.run_in_executor( None, lambda: separate_audio_sync( audio_data, @@ -551,15 +549,8 @@ async def separate_audio( ), ) - # Return immediately — client polls /status/{task_id} - return { - "task_id": task_id, - "status": "submitted", - "progress": 0, - "original_filename": filename, - "models_used": [f"preset:{preset}"] if preset else (models_list or ["default"]), - "total_models": 1 if preset else (len(models_list) if models_list else 1), - } + # Return the completed/error result (Firestore + GCS already updated by separate_audio_sync) + return result except HTTPException: raise