Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
301 changes: 292 additions & 9 deletions src/iceberg/expression/json_serde.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,33 @@
* under the License.
*/

#include <format>
#include <ranges>
#include <string>
#include <utility>
#include <vector>

#include <nlohmann/json.hpp>

#include "iceberg/expression/json_serde_internal.h"
#include "iceberg/expression/literal.h"
#include "iceberg/expression/predicate.h"
#include "iceberg/expression/term.h"
#include "iceberg/transform.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/json_util_internal.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/transform_util.h"

namespace iceberg {
namespace {
// JSON field names
constexpr std::string_view kType = "type";
constexpr std::string_view kTerm = "term";
constexpr std::string_view kTransform = "transform";
constexpr std::string_view kValue = "value";
constexpr std::string_view kValues = "values";
constexpr std::string_view kLeft = "left";
constexpr std::string_view kRight = "right";
constexpr std::string_view kChild = "child";
// Expression type strings
constexpr std::string_view kTypeTrue = "true";
constexpr std::string_view kTypeFalse = "false";
Expand All @@ -58,6 +69,53 @@ constexpr std::string_view kTypeCountNull = "count-null";
constexpr std::string_view kTypeCountStar = "count-star";
constexpr std::string_view kTypeMin = "min";
constexpr std::string_view kTypeMax = "max";
constexpr std::string_view kTypeLiteral = "literal";
constexpr std::string_view kTypeReference = "reference";

/// Helper to check if a JSON term represents a transform
bool IsTransformTerm(const nlohmann::json& json) {
return json.is_object() && json.contains(kType) &&
json[kType].get<std::string>() == kTransform && json.contains(kTerm);
}

/// Template helper to create predicates from JSON with the appropriate term type
template <typename B>
Result<std::unique_ptr<UnboundPredicate>> MakePredicateFromJson(
Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term,
const nlohmann::json& json) {
if (IsUnaryOperation(op)) {
if (json.contains(kValue)) [[unlikely]] {
return JsonParseError("Unary predicate has invalid 'value' field: {}",
SafeDumpJson(json));
}
if (json.contains(kValues)) [[unlikely]] {
return JsonParseError("Unary predicate has invalid 'values' field: {}",
SafeDumpJson(json));
}
return UnboundPredicateImpl<B>::Make(op, std::move(term));
}

if (IsSetOperation(op)) {
std::vector<Literal> literals;
if (!json.contains(kValues) || !json[kValues].is_array()) [[unlikely]] {
return JsonParseError("Missing or invalid 'values' field for set operation: {}",
SafeDumpJson(json));
}
for (const auto& val : json[kValues]) {
ICEBERG_ASSIGN_OR_RAISE(auto lit, LiteralFromJson(val));
literals.push_back(std::move(lit));
}
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literals));
}

// Literal predicate
if (!json.contains(kValue)) [[unlikely]] {
return JsonParseError("Missing 'value' field for literal predicate: {}",
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto literal, LiteralFromJson(json[kValue]));
return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literal));
}
} // namespace

bool IsUnaryOperation(Expression::Operation op) {
Expand All @@ -83,7 +141,7 @@ bool IsSetOperation(Expression::Operation op) {
}

Result<Expression::Operation> OperationTypeFromJson(const nlohmann::json& json) {
if (!json.is_string()) {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Unable to create operation. Json value is not a string");
}
auto typeStr = json.get<std::string>();
Expand Down Expand Up @@ -123,27 +181,252 @@ nlohmann::json ToJson(Expression::Operation op) {
return json;
}

nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); }

Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson(
const nlohmann::json& json) {
if (json.is_object() && json.contains(kType) &&
json[kType].get<std::string>() == kTypeReference && json.contains(kTerm)) {
return NamedReference::Make(json[kTerm].get<std::string>());
}
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Expected string for named reference");
}
return NamedReference::Make(json.get<std::string>());
}

nlohmann::json ToJson(const UnboundTransform& transform) {
auto& mutable_transform = const_cast<UnboundTransform&>(transform);
nlohmann::json json;
json[kType] = kTransform;
json[kTransform] = transform.transform()->ToString();
json[kTerm] = mutable_transform.reference()->name();
return json;
}

Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson(
const nlohmann::json& json) {
if (IsTransformTerm(json)) {
ICEBERG_ASSIGN_OR_RAISE(auto transform_str,
GetJsonValue<std::string>(json, kTransform));
ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str));
ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm]));
return UnboundTransform::Make(std::move(ref), std::move(transform));
}
return JsonParseError("Invalid unbound transform json: {}", SafeDumpJson(json));
}

nlohmann::json ToJson(const Literal& literal) {
if (literal.IsNull()) {
return nullptr;
}

const auto type_id = literal.type()->type_id();
const auto& value = literal.value();

switch (type_id) {
case TypeId::kBoolean:
return std::get<bool>(value);
case TypeId::kInt:
return std::get<int32_t>(value);
case TypeId::kDate:
return TransformUtil::HumanDay(std::get<int32_t>(value));
case TypeId::kLong:
return std::get<int64_t>(value);
case TypeId::kTime:
return TransformUtil::HumanTime(std::get<int64_t>(value));
case TypeId::kTimestamp:
return TransformUtil::HumanTimestamp(std::get<int64_t>(value));
case TypeId::kTimestampTz:
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value));
case TypeId::kFloat:
return std::get<float>(value);
case TypeId::kDouble:
return std::get<double>(value);
case TypeId::kString:
return std::get<std::string>(value);
case TypeId::kBinary:
case TypeId::kFixed: {
const auto& bytes = std::get<std::vector<uint8_t>>(value);
std::string hex;
hex.reserve(bytes.size() * 2);
for (uint8_t byte : bytes) {
hex += std::format("{:02X}", byte);
}
return hex;
}
case TypeId::kDecimal: {
return literal.ToString();
}
case TypeId::kUuid:
return std::get<Uuid>(value).ToString();
default:
nlohmann::json json;
return json;
}
}

Result<Literal> LiteralFromJson(const nlohmann::json& json) {
// Unwrap {"type": "literal", "value": <actual>} wrapper
if (json.is_object() && json.contains(kType) &&
json[kType].get<std::string>() == kTypeLiteral && json.contains(kValue)) {
return LiteralFromJson(json[kValue]);
}
if (json.is_null()) {
return Literal::Null(nullptr);
}
if (json.is_boolean()) {
return Literal::Boolean(json.get<bool>());
}
if (json.is_number_integer()) {
return Literal::Long(json.get<int64_t>());
}
if (json.is_number_float()) {
return Literal::Double(json.get<double>());
}
if (json.is_string()) {
// All strings are returned as String literals.
// Conversion to binary/date/time/etc. happens during binding
// when schema type information is available.
return Literal::String(json.get<std::string>());
}
return JsonParseError("Unsupported literal JSON type");
}

nlohmann::json TermToJson(const Term& term) {
switch (term.kind()) {
case Term::Kind::kReference:
return ToJson(static_cast<const NamedReference&>(term));
case Term::Kind::kTransform:
return ToJson(static_cast<const UnboundTransform&>(term));
default:
return nullptr;
}
}

nlohmann::json ToJson(const UnboundPredicate& pred) {
nlohmann::json json;
json[kType] = ToJson(pred.op());

// Get term and literals by casting to the appropriate impl type
std::span<const Literal> literals;

if (auto* ref_pred = dynamic_cast<const UnboundPredicateImpl<BoundReference>*>(&pred)) {
json[kTerm] = TermToJson(*ref_pred->term());
literals = ref_pred->literals();
} else if (auto* transform_pred =
dynamic_cast<const UnboundPredicateImpl<BoundTransform>*>(&pred)) {
json[kTerm] = TermToJson(*transform_pred->term());
literals = transform_pred->literals();
}

if (!IsUnaryOperation(pred.op())) {
if (IsSetOperation(pred.op())) {
nlohmann::json values = nlohmann::json::array();
for (const auto& lit : literals) {
values.push_back(ToJson(lit));
}
json[kValues] = std::move(values);
} else if (!literals.empty()) {
json[kValue] = ToJson(literals[0]);
}
}
return json;
}

Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson(
const nlohmann::json& json) {
ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));

const auto& term_json = json[kTerm];

if (IsTransformTerm(term_json)) {
ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json));
return MakePredicateFromJson<BoundTransform>(op, std::move(term), json);
}

ICEBERG_ASSIGN_OR_RAISE(auto term, NamedReferenceFromJson(term_json));
return MakePredicateFromJson<BoundReference>(op, std::move(term), json);
}

Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json) {
// Handle boolean
// Handle boolean constants
if (json.is_boolean()) {
return json.get<bool>()
? internal::checked_pointer_cast<Expression>(True::Instance())
: internal::checked_pointer_cast<Expression>(False::Instance());
}
return JsonParseError("Only booleans are currently supported.");

if (!json.is_object()) [[unlikely]] {
return JsonParseError("Expression must be boolean or object");
}

ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType]));

switch (op) {
case Expression::Operation::kAnd: {
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
return JsonParseError("AND expression missing 'left' or 'right' field");
}
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
ICEBERG_ASSIGN_OR_RAISE(auto result, And::Make(std::move(left), std::move(right)));
return std::shared_ptr<Expression>(std::move(result));
}
case Expression::Operation::kOr: {
if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] {
return JsonParseError("OR expression missing 'left' or 'right' field");
}
ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft]));
ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight]));
ICEBERG_ASSIGN_OR_RAISE(auto result, Or::Make(std::move(left), std::move(right)));
return std::shared_ptr<Expression>(std::move(result));
}
case Expression::Operation::kNot: {
if (!json.contains(kChild)) [[unlikely]] {
return JsonParseError("NOT expression missing 'child' field");
}
ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(json[kChild]));
ICEBERG_ASSIGN_OR_RAISE(auto result, Not::Make(std::move(child)));
return std::shared_ptr<Expression>(std::move(result));
}
default:
// All other operations are predicates
return UnboundPredicateFromJson(json);
}
}

nlohmann::json ToJson(const Expression& expr) {
switch (expr.op()) {
case Expression::Operation::kTrue:
return true;

case Expression::Operation::kFalse:
return false;
case Expression::Operation::kAnd: {
const auto& and_expr = static_cast<const And&>(expr);
nlohmann::json json;
json[kType] = ToJson(expr.op());
json[kLeft] = ToJson(*and_expr.left());
json[kRight] = ToJson(*and_expr.right());
return json;
}
case Expression::Operation::kOr: {
const auto& or_expr = static_cast<const Or&>(expr);
nlohmann::json json;
json[kType] = ToJson(expr.op());
json[kLeft] = ToJson(*or_expr.left());
json[kRight] = ToJson(*or_expr.right());
return json;
}
case Expression::Operation::kNot: {
const auto& not_expr = static_cast<const Not&>(expr);
nlohmann::json json;
json[kType] = ToJson(expr.op());
json[kChild] = ToJson(*not_expr.child());
return json;
}
default:
// TODO(evindj): This code will be removed as we implemented the full expression
// serialization.
ICEBERG_CHECK_OR_DIE(false, "Only booleans are currently supported.");
return ToJson(dynamic_cast<const UnboundPredicate&>(expr));
}
}

Expand Down
Loading
Loading