From 58ae45fc545ab13d461f3fa96ced4b9924b8f77d Mon Sep 17 00:00:00 2001
From: Gleb Khaykin <khaykingleb@gmail.com>
Date: Wed, 11 Mar 2026 12:55:33 +0100
Subject: [PATCH 1/3] feat(rl): update spec

---
 openapi.yaml | 239 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 210 insertions(+), 29 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index a1e62da..5b90f25 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -7151,18 +7151,16 @@ paths:
           in: query
           required: false
           schema:
-            description: Maximum number of sessions to return (1-100), defaults to 20
+            description: Maximum number of sessions to return (1-100)
             type: integer
             format: int32
             default: "20"
-        - name: offset
+        - name: after
           in: query
           required: false
           schema:
-            description: Number of sessions to skip
-            type: integer
-            format: int32
-            default: "0"
+            description: Cursor for pagination (ID of the last session from the previous page)
+            type: string
     post:
       summary: Create training session
       description: Creates a training session and returns its details.
@@ -7187,7 +7185,6 @@ paths:
             application/json:
               schema:
                 $ref: '#/components/schemas/RpcStatus'
-
   /rl/training-sessions/{session_id}:
     get:
       summary: Get training session
@@ -7214,6 +7211,32 @@ paths:
           schema:
             description: ID of the training session
             type: string
+  /rl/training-sessions/{session_id}/stop:
+    post:
+      summary: Stop training session
+      description: Stops a training session.
+      operationId: stopTrainingSession
+      tags: [RL]
+      responses:
+        "200":
+          description: Training session details
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RL.TrainingSession'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RpcStatus'
+      parameters:
+        - name: session_id
+          in: path
+          required: true
+          schema:
+            description: ID of the training session
+            type: string
   /rl/training-sessions/{session_id}/operations/forward-backward/{operation_id}:
     get:
       summary: Get forward-backward operation
@@ -7406,19 +7429,19 @@ paths:
           schema:
             description: Training session ID
             type: string
-  /rl/training-sessions/{session_id}/stop:
+  /rl/training-sessions/{session_id}/operations/inference-checkpoint:
     post:
-      summary: Stop training session
-      description: Stops a training session.
-      operationId: stopTrainingSession
+      summary: Create inference checkpoint
+      description: Submits an operation that will asynchronously save the current LoRA adapter as an inference checkpoint and upload it to object storage.
+      operationId: createInferenceCheckpoint
       tags: [RL]
       responses:
         "200":
-          description: Training session details
+          description: Inference checkpoint operation details
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RL.TrainingSession'
+                $ref: '#/components/schemas/RL.InferenceCheckpointOperation'
         default:
           description: An unexpected error response.
           content:
@@ -7430,8 +7453,72 @@ paths:
           in: path
           required: true
           schema:
-            description: ID of the training session
+            description: Training session ID
             type: string
+  /rl/training-sessions/{session_id}/operations/inference-checkpoint/{operation_id}:
+    get:
+      summary: Get inference checkpoint operation
+      description: Retrieves the current status and result of an inference checkpoint operation.
+      operationId: getInferenceCheckpointOperation
+      tags: [RL]
+      responses:
+        "200":
+          description: Inference checkpoint operation details
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RL.InferenceCheckpointOperation'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RpcStatus'
+      parameters:
+        - name: session_id
+          in: path
+          required: true
+          schema:
+            description: Training session ID
+            type: string
+        - name: operation_id
+          in: path
+          required: true
+          schema:
+            description: Operation ID
+            type: string
+  /rl/checkpoints/{id}/download:
+    get:
+      summary: Download checkpoint
+      description: Returns presigned URLs for downloading a checkpoint's model files. Only inference checkpoints support downloading.
+      operationId: downloadCheckpoint
+      tags: [RL]
+      responses:
+        "200":
+          description: Checkpoint download URLs
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RL.CheckpointDownloadResponse'
+        default:
+          description: An unexpected error response.
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/RpcStatus'
+      parameters:
+        - name: id
+          in: path
+          required: true
+          schema:
+            description: ID of the checkpoint
+            type: string
+        - name: variant
+          in: query
+          required: true
+          schema:
+            description: "Checkpoint variant to download: merged (full model) or adapter (LoRA weights only)"
+            $ref: '#/components/schemas/RL.CheckpointVariant'
 
 components:
   securitySchemes:
@@ -7544,21 +7631,15 @@ components:
     RL.ListMeta:
       type: object
       properties:
-        total:
-          type: string
-          format: int64
-          example: 42
-          description: Total number of items matching the filter
         limit:
           type: integer
           format: int32
           example: 20
           description: Maximum number of items returned per page
-        offset:
-          type: integer
-          format: int32
-          example: 0
-          description: Number of items skipped
+        has_more:
+          type: boolean
+          example: true
+          description: Whether more items exist beyond this page
     RL.EncodedText:
       type: object
       properties:
@@ -7622,7 +7703,7 @@ components:
       enum:
         - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
         - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON
-        - GRPO_LOSS_AGGREGATION_TYPE_PER_TOKEN
+        - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN
       default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED
     RL.LossConfig:
       type: object
@@ -7870,6 +7951,7 @@ components:
         - TRAINING_OPERATION_ERROR_CODE_TIMEOUT
         - TRAINING_OPERATION_ERROR_CODE_INTERNAL_ERROR
         - TRAINING_OPERATION_ERROR_CODE_SESSION_NOT_ACTIVE
+        - TRAINING_OPERATION_ERROR_CODE_INVALID_INPUT
       default: TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED
     RL.TrainingOperationStatus:
       type: string
@@ -7910,30 +7992,39 @@ components:
     RL.TrainingSession:
       type: object
       properties:
-        session_id:
+        id:
           type: string
+          example: 123e4567-e89b-12d3-a456-426614174000
           description: ID of the training session
         status:
           $ref: '#/components/schemas/RL.TrainingSessionStatus'
+          example: TRAINING_SESSION_STATUS_RUNNING
+          description: Status of the training session
         base_model:
           type: string
           example: meta-llama/Meta-Llama-3-8B-Instruct
           description: Base model used for the training session
-        checkpoint_id:
-          description: Checkpoint ID to use for the training session
-          type: string
+        inference_checkpoints:
+          type: array
+          items:
+            type: object
+            $ref: '#/components/schemas/RL.InferenceCheckpoint'
+          description: List of saved inference checkpoints for this session
         step:
           description: Current training step
           type: string
           format: uint64
+          example: 100
           default: "0"
         created_at:
           type: string
           format: date-time
+          example: "2026-01-02T00:00:00Z"
           description: Timestamp when the training session was created
         updated_at:
           type: string
           format: date-time
+          example: "2026-01-02T00:00:05Z"
           description: Timestamp when the training session was last updated
         lora_config:
           $ref: '#/components/schemas/RL.LoraConfig'
@@ -7999,6 +8090,96 @@ components:
           format: float
           example: 0.1
           default: "0.1"
+    RL.InferenceCheckpointOperation:
+      type: object
+      properties:
+        operation_id:
+          type: string
+          example: 550e8400-e29b-41d4-a716-446655440000
+          description: Operation ID
+        status:
+          $ref: '#/components/schemas/RL.TrainingOperationStatus'
+          example: TRAINING_OPERATION_STATUS_PENDING
+          description: Operation status
+        output:
+          $ref: '#/components/schemas/RL.InferenceCheckpointResult'
+        error:
+          $ref: '#/components/schemas/RL.TrainingOperationError'
+    RL.InferenceCheckpointResult:
+      type: object
+      properties:
+        model_name:
+          type: string
+          example: username/Meta-Llama-3-8B-rl-step-42-20260216
+          description: Registered model name for downloading the checkpoint
+    RL.InferenceCheckpoint:
+      type: object
+      description: Saved inference checkpoint
+      properties:
+        id:
+          type: string
+          example: 123e4567-e89b-12d3-a456-426614174000
+          description: Unique identifier for the checkpoint
+        step:
+          type: string
+          format: uint64
+          example: 42
+          description: Training step at time of save
+        created_at:
+          type: string
+          format: date-time
+          example: "2026-01-02T00:00:00Z"
+          description: Timestamp when the checkpoint was created
+        registration:
+          $ref: '#/components/schemas/RL.InferenceCheckpointRegistration'
+          description: Model registration details
+    RL.InferenceCheckpointRegistration:
+      type: object
+      description: Model registration details for an inference checkpoint
+      properties:
+        model_name:
+          type: string
+          example: username/Meta-Llama-3-8B-rl-step-42-20260216
+          description: Registered model name for downloading the checkpoint
+        registered_at:
+          type: string
+          format: date-time
+          example: "2026-01-02T00:00:00Z"
+          description: Timestamp when the model was registered
+    RL.CheckpointVariant:
+      type: string
+      enum:
+        - CHECKPOINT_VARIANT_UNSPECIFIED
+        - CHECKPOINT_VARIANT_MERGED
+        - CHECKPOINT_VARIANT_ADAPTER
+      default: CHECKPOINT_VARIANT_UNSPECIFIED
+      description: "Checkpoint variant: merged (full model) or adapter (LoRA weights only)"
+    RL.CheckpointFile:
+      type: object
+      description: A downloadable file within a checkpoint
+      properties:
+        filename:
+          type: string
+          example: model-00001-of-00002.safetensors
+          description: Name of the file
+        url:
+          type: string
+          example: "https://..."
+          description: Presigned URL for downloading the file
+        size:
+          type: string
+          format: int64
+          example: 123456789
+          description: File size in bytes
+    RL.CheckpointDownloadResponse:
+      type: object
+      properties:
+        data:
+          type: array
+          items:
+            type: object
+            $ref: '#/components/schemas/RL.CheckpointFile'
+          description: List of files with presigned download URLs
     ErrorResponse:
       type: object
       properties:

From 941af0ae464c5ff8b7a8175b95986132c1ac508b Mon Sep 17 00:00:00 2001
From: Gleb Khaykin <khaykingleb@gmail.com>
Date: Wed, 11 Mar 2026 13:14:09 +0100
Subject: [PATCH 2/3] refactor: :hammer:

---
 openapi.yaml | 45 +++++++++++++--------------------------------
 1 file changed, 13 insertions(+), 32 deletions(-)

diff --git a/openapi.yaml b/openapi.yaml
index 5b90f25..7ed8bb5 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -7139,7 +7139,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: status
           in: query
@@ -7184,7 +7184,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
   /rl/training-sessions/{session_id}:
     get:
       summary: Get training session
@@ -7203,7 +7203,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7229,7 +7229,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7255,7 +7255,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7287,7 +7287,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7319,7 +7319,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7357,7 +7357,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7389,7 +7389,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7421,7 +7421,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7447,7 +7447,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7473,7 +7473,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: session_id
           in: path
@@ -7505,7 +7505,7 @@ paths:
           content:
             application/json:
               schema:
-                $ref: '#/components/schemas/RpcStatus'
+                $ref: '#/components/schemas/ErrorData'
       parameters:
         - name: id
           in: path
@@ -8187,25 +8187,6 @@ components:
           type: integer
         message:
           type: string
-    ProtobufAny:
-      type: object
-      properties:
-        '@type':
-          type: string
-      additionalProperties: {}
-    RpcStatus:
-      type: object
-      properties:
-        code:
-          type: integer
-          format: int32
-        message:
-          type: string
-        details:
-          type: array
-          items:
-            type: object
-            $ref: '#/components/schemas/ProtobufAny'
     GPUClusterControlPlaneNode:
       type: object
       required:

From a6f5d4f0d6c08e09c90c4d35d446a0b311d4337e Mon Sep 17 00:00:00 2001
From: Gleb Khaykin <khaykingleb@gmail.com>
Date: Wed, 11 Mar 2026 15:53:14 +0100
Subject: [PATCH 3/3] refactor: :hammer:

---
 openapi.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/openapi.yaml b/openapi.yaml
index 7ed8bb5..dea4b15 100644
--- a/openapi.yaml
+++ b/openapi.yaml
@@ -8093,7 +8093,7 @@ components:
     RL.InferenceCheckpointOperation:
       type: object
       properties:
-        operation_id:
+        id:
           type: string
           example: 550e8400-e29b-41d4-a716-446655440000
           description: Operation ID