Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions audio_separator/remote/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def separate_audio(
data["custom_output_names"] = json.dumps(custom_output_names)

try:
# Server returns immediately with task_id; 60s is generous for submission
# Server processes synchronously; 1800s matches Cloud Run request timeout.
# When using gcs_uri (no file upload), we still need multipart/form-data
# encoding because FastAPI requires it for endpoints with File()/Form() params.
# Passing a dummy empty file field forces requests to use multipart encoding.
Expand All @@ -158,7 +158,7 @@ def separate_audio(
f"{self.api_url}/separate",
files=files,
data=data,
timeout=60,
timeout=1800,
)
response.raise_for_status()
return response.json()
Expand Down
29 changes: 10 additions & 19 deletions audio_separator/remote/deploy_cloudrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
models_ready = False

# --- Async job infrastructure ---
gpu_semaphore = threading.Semaphore(1)

OUTPUT_BUCKET = os.environ.get("OUTPUT_BUCKET", "nomadkaraoke-audio-separator-outputs")
GCP_PROJECT = os.environ.get("GCP_PROJECT", "nomadkaraoke")
Expand Down Expand Up @@ -231,11 +230,8 @@ def update_status(status: str, progress: int = 0, error: str = None, files: dict
except Exception as e:
logger.warning(f"[{task_id}] Failed to update Firestore status: {e}")

# Wait for GPU availability
update_status("queued", 0)
logger.info(f"[{task_id}] Waiting for GPU semaphore...")
gpu_semaphore.acquire()
logger.info(f"[{task_id}] GPU semaphore acquired, starting separation")
update_status("processing", 0)
logger.info(f"[{task_id}] Starting separation")
try:
os.makedirs(f"{STORAGE_DIR}/outputs/{task_id}", exist_ok=True)
output_dir = f"{STORAGE_DIR}/outputs/{task_id}"
Expand Down Expand Up @@ -379,8 +375,7 @@ def update_status(status: str, progress: int = 0, error: str = None, files: dict
return {"task_id": task_id, "status": "error", "error": str(e), "models_used": models_used}

finally:
gpu_semaphore.release()
logger.info(f"[{task_id}] GPU semaphore released")
logger.info(f"[{task_id}] Separation finished, cleaning up local files")
# Clean up local files (outputs are in GCS now)
output_dir = f"{STORAGE_DIR}/outputs/{task_id}"
if os.path.exists(output_dir):
Expand Down Expand Up @@ -507,9 +502,12 @@ async def separate_audio(
"instance_id": instance_id,
})

# Fire-and-forget: run separation in background thread
# Run separation synchronously — Cloud Run keeps this request active,
# which lets the autoscaler know this instance is busy and route new
# requests to new instances (with concurrency=1).
# This matches Modal's .spawn() pattern: each job gets its own GPU instance.
loop = asyncio.get_event_loop()
loop.run_in_executor(
result = await loop.run_in_executor(
None,
lambda: separate_audio_sync(
audio_data,
Expand Down Expand Up @@ -551,15 +549,8 @@ async def separate_audio(
),
)

# Return immediately — client polls /status/{task_id}
return {
"task_id": task_id,
"status": "submitted",
"progress": 0,
"original_filename": filename,
"models_used": [f"preset:{preset}"] if preset else (models_list or ["default"]),
"total_models": 1 if preset else (len(models_list) if models_list else 1),
}
# Return the completed/error result (Firestore + GCS already updated by separate_audio_sync)
return result

except HTTPException:
raise
Expand Down
Loading