From 58ae45fc545ab13d461f3fa96ced4b9924b8f77d Mon Sep 17 00:00:00 2001 From: Gleb Khaykin Date: Wed, 11 Mar 2026 12:55:33 +0100 Subject: [PATCH 1/3] feat(rl): update spec --- openapi.yaml | 239 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 210 insertions(+), 29 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index a1e62da..5b90f25 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7151,18 +7151,16 @@ paths: in: query required: false schema: - description: Maximum number of sessions to return (1-100), defaults to 20 + description: Maximum number of sessions to return (1-100) type: integer format: int32 default: "20" - - name: offset + - name: after in: query required: false schema: - description: Number of sessions to skip - type: integer - format: int32 - default: "0" + description: Cursor for pagination (ID of the last session from the previous page) + type: string post: summary: Create training session description: Creates a training session and returns its details. @@ -7187,7 +7185,6 @@ paths: application/json: schema: $ref: '#/components/schemas/RpcStatus' - /rl/training-sessions/{session_id}: get: summary: Get training session @@ -7214,6 +7211,32 @@ paths: schema: description: ID of the training session type: string + /rl/training-sessions/{session_id}/stop: + post: + summary: Stop training session + description: Stops a training session. + operationId: stopTrainingSession + tags: [RL] + responses: + "200": + description: Training session details + content: + application/json: + schema: + $ref: '#/components/schemas/RL.TrainingSession' + default: + description: An unexpected error response. + content: + application/json: + schema: + $ref: '#/components/schemas/RpcStatus' + parameters: + - name: session_id + in: path + required: true + schema: + description: ID of the training session + type: string /rl/training-sessions/{session_id}/operations/forward-backward/{operation_id}: get: summary: Get forward-backward operation @@ -7406,19 +7429,19 @@ paths: schema: description: Training session ID type: string - /rl/training-sessions/{session_id}/stop: + /rl/training-sessions/{session_id}/operations/inference-checkpoint: post: - summary: Stop training session - description: Stops a training session. - operationId: stopTrainingSession + summary: Create inference checkpoint + description: Submits an operation that will asynchronously save the current LoRA adapter as an inference checkpoint and upload it to object storage. + operationId: createInferenceCheckpoint tags: [RL] responses: "200": - description: Training session details + description: Inference checkpoint operation details content: application/json: schema: - $ref: '#/components/schemas/RL.TrainingSession' + $ref: '#/components/schemas/RL.InferenceCheckpointOperation' default: description: An unexpected error response. content: @@ -7430,8 +7453,72 @@ paths: in: path required: true schema: - description: ID of the training session + description: Training session ID type: string + /rl/training-sessions/{session_id}/operations/inference-checkpoint/{operation_id}: + get: + summary: Get inference checkpoint operation + description: Retrieves the current status and result of an inference checkpoint operation. + operationId: getInferenceCheckpointOperation + tags: [RL] + responses: + "200": + description: Inference checkpoint operation details + content: + application/json: + schema: + $ref: '#/components/schemas/RL.InferenceCheckpointOperation' + default: + description: An unexpected error response. + content: + application/json: + schema: + $ref: '#/components/schemas/RpcStatus' + parameters: + - name: session_id + in: path + required: true + schema: + description: Training session ID + type: string + - name: operation_id + in: path + required: true + schema: + description: Operation ID + type: string + /rl/checkpoints/{id}/download: + get: + summary: Download checkpoint + description: Returns presigned URLs for downloading a checkpoint's model files. Only inference checkpoints support downloading. + operationId: downloadCheckpoint + tags: [RL] + responses: + "200": + description: Checkpoint download URLs + content: + application/json: + schema: + $ref: '#/components/schemas/RL.CheckpointDownloadResponse' + default: + description: An unexpected error response. + content: + application/json: + schema: + $ref: '#/components/schemas/RpcStatus' + parameters: + - name: id + in: path + required: true + schema: + description: ID of the checkpoint + type: string + - name: variant + in: query + required: true + schema: + description: "Checkpoint variant to download: merged (full model) or adapter (LoRA weights only)" + $ref: '#/components/schemas/RL.CheckpointVariant' components: securitySchemes: @@ -7544,21 +7631,15 @@ components: RL.ListMeta: type: object properties: - total: - type: string - format: int64 - example: 42 - description: Total number of items matching the filter limit: type: integer format: int32 example: 20 description: Maximum number of items returned per page - offset: - type: integer - format: int32 - example: 0 - description: Number of items skipped + has_more: + type: boolean + example: true + description: Whether more items exist beyond this page RL.EncodedText: type: object properties: @@ -7622,7 +7703,7 @@ components: enum: - GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED - GRPO_LOSS_AGGREGATION_TYPE_FIXED_HORIZON - - GRPO_LOSS_AGGREGATION_TYPE_PER_TOKEN + - GRPO_LOSS_AGGREGATION_TYPE_TOKEN_MEAN default: GRPO_LOSS_AGGREGATION_TYPE_UNSPECIFIED RL.LossConfig: type: object @@ -7870,6 +7951,7 @@ components: - TRAINING_OPERATION_ERROR_CODE_TIMEOUT - TRAINING_OPERATION_ERROR_CODE_INTERNAL_ERROR - TRAINING_OPERATION_ERROR_CODE_SESSION_NOT_ACTIVE + - TRAINING_OPERATION_ERROR_CODE_INVALID_INPUT default: TRAINING_OPERATION_ERROR_CODE_UNSPECIFIED RL.TrainingOperationStatus: type: string @@ -7910,30 +7992,39 @@ components: RL.TrainingSession: type: object properties: - session_id: + id: type: string + example: 123e4567-e89b-12d3-a456-426614174000 description: ID of the training session status: $ref: '#/components/schemas/RL.TrainingSessionStatus' + example: TRAINING_SESSION_STATUS_RUNNING + description: Status of the training session base_model: type: string example: meta-llama/Meta-Llama-3-8B-Instruct description: Base model used for the training session - checkpoint_id: - description: Checkpoint ID to use for the training session - type: string + inference_checkpoints: + type: array + items: + type: object + $ref: '#/components/schemas/RL.InferenceCheckpoint' + description: List of saved inference checkpoints for this session step: description: Current training step type: string format: uint64 + example: 100 default: "0" created_at: type: string format: date-time + example: "2026-01-02T00:00:00Z" description: Timestamp when the training session was created updated_at: type: string format: date-time + example: "2026-01-02T00:00:05Z" description: Timestamp when the training session was last updated lora_config: $ref: '#/components/schemas/RL.LoraConfig' @@ -7999,6 +8090,96 @@ components: format: float example: 0.1 default: "0.1" + RL.InferenceCheckpointOperation: + type: object + properties: + operation_id: + type: string + example: 550e8400-e29b-41d4-a716-446655440000 + description: Operation ID + status: + $ref: '#/components/schemas/RL.TrainingOperationStatus' + example: TRAINING_OPERATION_STATUS_PENDING + description: Operation status + output: + $ref: '#/components/schemas/RL.InferenceCheckpointResult' + error: + $ref: '#/components/schemas/RL.TrainingOperationError' + RL.InferenceCheckpointResult: + type: object + properties: + model_name: + type: string + example: username/Meta-Llama-3-8B-rl-step-42-20260216 + description: Registered model name for downloading the checkpoint + RL.InferenceCheckpoint: + type: object + description: Saved inference checkpoint + properties: + id: + type: string + example: 123e4567-e89b-12d3-a456-426614174000 + description: Unique identifier for the checkpoint + step: + type: string + format: uint64 + example: 42 + description: Training step at time of save + created_at: + type: string + format: date-time + example: "2026-01-02T00:00:00Z" + description: Timestamp when the checkpoint was created + registration: + $ref: '#/components/schemas/RL.InferenceCheckpointRegistration' + description: Model registration details + RL.InferenceCheckpointRegistration: + type: object + description: Model registration details for an inference checkpoint + properties: + model_name: + type: string + example: username/Meta-Llama-3-8B-rl-step-42-20260216 + description: Registered model name for downloading the checkpoint + registered_at: + type: string + format: date-time + example: "2026-01-02T00:00:00Z" + description: Timestamp when the model was registered + RL.CheckpointVariant: + type: string + enum: + - CHECKPOINT_VARIANT_UNSPECIFIED + - CHECKPOINT_VARIANT_MERGED + - CHECKPOINT_VARIANT_ADAPTER + default: CHECKPOINT_VARIANT_UNSPECIFIED + description: "Checkpoint variant: merged (full model) or adapter (LoRA weights only)" + RL.CheckpointFile: + type: object + description: A downloadable file within a checkpoint + properties: + filename: + type: string + example: model-00001-of-00002.safetensors + description: Name of the file + url: + type: string + example: "https://..." + description: Presigned URL for downloading the file + size: + type: string + format: int64 + example: 123456789 + description: File size in bytes + RL.CheckpointDownloadResponse: + type: object + properties: + data: + type: array + items: + type: object + $ref: '#/components/schemas/RL.CheckpointFile' + description: List of files with presigned download URLs ErrorResponse: type: object properties: From 941af0ae464c5ff8b7a8175b95986132c1ac508b Mon Sep 17 00:00:00 2001 From: Gleb Khaykin Date: Wed, 11 Mar 2026 13:14:09 +0100 Subject: [PATCH 2/3] refactor: :hammer: --- openapi.yaml | 45 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 5b90f25..7ed8bb5 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7139,7 +7139,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: status in: query @@ -7184,7 +7184,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' /rl/training-sessions/{session_id}: get: summary: Get training session @@ -7203,7 +7203,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7229,7 +7229,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7255,7 +7255,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7287,7 +7287,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7319,7 +7319,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7357,7 +7357,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7389,7 +7389,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7421,7 +7421,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7447,7 +7447,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7473,7 +7473,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: session_id in: path @@ -7505,7 +7505,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/RpcStatus' + $ref: '#/components/schemas/ErrorData' parameters: - name: id in: path @@ -8187,25 +8187,6 @@ components: type: integer message: type: string - ProtobufAny: - type: object - properties: - '@type': - type: string - additionalProperties: {} - RpcStatus: - type: object - properties: - code: - type: integer - format: int32 - message: - type: string - details: - type: array - items: - type: object - $ref: '#/components/schemas/ProtobufAny' GPUClusterControlPlaneNode: type: object required: From a6f5d4f0d6c08e09c90c4d35d446a0b311d4337e Mon Sep 17 00:00:00 2001 From: Gleb Khaykin Date: Wed, 11 Mar 2026 15:53:14 +0100 Subject: [PATCH 3/3] refactor: :hammer: --- openapi.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openapi.yaml b/openapi.yaml index 7ed8bb5..dea4b15 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -8093,7 +8093,7 @@ components: RL.InferenceCheckpointOperation: type: object properties: - operation_id: + id: type: string example: 550e8400-e29b-41d4-a716-446655440000 description: Operation ID