From 3ccc97a0cf2012b328a0d1de17cee3d713c1a12e Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Fri, 3 Apr 2026 12:33:52 -0700 Subject: [PATCH 1/2] feat: Register full-text search functions in unified SQL path Add UnifiedFunctionSpec with fluent builder to define relevance function signatures (match, match_phrase, multi_match, etc.) as a composable SqlOperatorTable chained into Calcite's FrameworkConfig. Functions are language-level primitives, always resolvable regardless of default schema. Add SQL and PPL test coverage for all 7 relevance functions. Signed-off-by: Chen Dai --- .../sql/api/UnifiedQueryContext.java | 4 + .../sql/api/spec/UnifiedFunctionSpec.java | 133 ++++++++++++++++++ .../api/UnifiedRelevanceSearchSqlTest.java | 130 +++++++++++++++++ .../sql/api/UnifiedRelevanceSearchTest.java | 82 +++++++++++ .../sql/api/UnifiedQueryTestBase.java | 10 ++ 5 files changed, 359 insertions(+) create mode 100644 api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java create mode 100644 api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java create mode 100644 api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 4332ff1766..dee030731e 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -23,6 +23,7 @@ import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; @@ -30,6 +31,7 @@ import org.opensearch.sql.api.parser.CalciteSqlQueryParser; import org.opensearch.sql.api.parser.PPLQueryParser; import org.opensearch.sql.api.parser.UnifiedQueryParser; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.common.setting.Settings; @@ -239,10 +241,12 @@ public List getSettings() { private FrameworkConfig buildFrameworkConfig() { SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus(); catalogs.forEach(rootSchema::add); + UnifiedFunctionSpec.registerAll(rootSchema); SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace); return Frameworks.newConfigBuilder() .parserConfig(buildParserConfig()) + .operatorTable(SqlStdOperatorTable.instance()) .defaultSchema(defaultSchema) .traitDefs((List) null) .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)) diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java new file mode 100644 index 0000000000..15c09a76d7 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -0,0 +1,133 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.IntStream; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.schema.FunctionParameter; +import org.apache.calcite.schema.ScalarFunction; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.type.SqlTypeName; +import org.checkerframework.checker.nullness.qual.Nullable; + +/** + * Central registry of language-specified function signatures (Unified Language Specification + * layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required + * parameters of type {@link SqlTypeName#ANY}. + * + *

This class defines what functions exist and their signatures. Function + * implementations live in the Unified Execution Runtime (UER) layer — see {@link + * org.opensearch.sql.api.function.UnifiedFunction} and {@link + * org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions + * (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER. + * + *

Named parameters enable SQL named-argument syntax ({@code match(field => col, query => + * 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no + * optional params), CALCITE-5366 + * is avoided entirely. + * + *

Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)}, + * following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives + * available regardless of catalog. Pushdown rules enforce data-source capability at optimization + * time. + * + * @see org.opensearch.sql.api.function.UnifiedFunction + * @see org.opensearch.sql.api.function.UnifiedFunctionRepository + */ +// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here, +// rather than independently discovering from PPLBuiltinOperators. The spec is the source of +// truth for what functions exist; UER provides how they execute. Decide whether to late-bind +// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent +// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only) +// are registered here. +public final class UnifiedFunctionSpec { + + private UnifiedFunctionSpec() {} + + /** Single-field relevance function params: (field, query). */ + private static final List SINGLE_FIELD_PARAMS = List.of("field", "query"); + + /** Multi-field relevance function params: (fields, query). */ + private static final List MULTI_FIELD_PARAMS = List.of("fields", "query"); + + private static final Map REGISTRY = + Map.of( + "match", scalarFunction(SINGLE_FIELD_PARAMS), + "match_phrase", scalarFunction(SINGLE_FIELD_PARAMS), + "match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS), + "match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS), + "multi_match", scalarFunction(MULTI_FIELD_PARAMS), + "simple_query_string", scalarFunction(MULTI_FIELD_PARAMS), + "query_string", scalarFunction(MULTI_FIELD_PARAMS)); + + /** Registers all language-specified functions on the given schema (typically root). */ + public static void registerAll(SchemaPlus schema) { + REGISTRY.forEach(schema::add); + } + + /** Returns the canonical ScalarFunction for a language-specified function, or null. */ + public static @Nullable ScalarFunction get(String name) { + return REGISTRY.get(name); + } + + /** Returns true if the name is a language-specified function. */ + public static boolean isLanguageFunction(String name) { + return REGISTRY.containsKey(name); + } + + /** All registered language function names. */ + public static Set names() { + return REGISTRY.keySet(); + } + + private static ScalarFunction scalarFunction(List paramNames) { + List params = + IntStream.range(0, paramNames.size()) + .mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i))) + .toList(); + return new BooleanScalarFunction(params); + } + + /** A ScalarFunction that returns BOOLEAN with the given parameters. */ + private record BooleanScalarFunction(List params) implements ScalarFunction { + @Override + public List getParameters() { + return params; + } + + @Override + public RelDataType getReturnType(RelDataTypeFactory typeFactory) { + return typeFactory.createSqlType(SqlTypeName.BOOLEAN); + } + } + + /** A required function parameter of type ANY. */ + private record AnyParam(int ordinal, String name) implements FunctionParameter { + @Override + public int getOrdinal() { + return ordinal; + } + + @Override + public String getName() { + return name; + } + + @Override + public boolean isOptional() { + return false; + } + + @Override + public RelDataType getType(RelDataTypeFactory typeFactory) { + return typeFactory.createSqlType(SqlTypeName.ANY); + } + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java new file mode 100644 index 0000000000..ed1d6ab6e9 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java @@ -0,0 +1,130 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for relevance search functions in SQL planning path. Functions are registered globally on + * the root schema via {@link org.opensearch.sql.api.spec.UnifiedFunctionSpec#registerAll}, resolved + * through Calcite's standard CalciteCatalogReader → toOp() pipeline. + */ +public class UnifiedRelevanceSearchSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE "match"(MAP['field', name], MAP['query', 'John'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase(MAP['field', name], MAP['query', 'John Doe'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match_phrase(MAP('field', $1), MAP('query', 'John Doe'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_bool_prefix(MAP['field', name], MAP['query', 'John'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match_bool_prefix(MAP('field', $1), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase_prefix(MAP['field', name], MAP['query', 'John'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMultiMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(\ + MAP['fields', MAP['name', 1.0, 'department', 2.0]], MAP['query', 'John'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[multi_match(MAP('fields', MAP(CAST('name'):CHAR(10) NOT NULL, 1.0:DECIMAL(2, 1), 'department', 2.0:DECIMAL(2, 1))), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSimpleQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(\ + MAP['fields', MAP['name', 1.0]], MAP['query', 'John'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name', 1.0:DECIMAL(2, 1))), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(\ + MAP['fields', MAP['name', 1.0]], MAP['query', 'John'])\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[query_string(MAP('fields', MAP('name', 1.0:DECIMAL(2, 1))), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java new file mode 100644 index 0000000000..06aa25d126 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java @@ -0,0 +1,82 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; + +/** Tests for relevance search functions in PPL planning path. */ +public class UnifiedRelevanceSearchTest extends UnifiedQueryTestBase { + + @Test + public void testMatch() { + givenQuery("source=catalog.employees | where match(name, 'John')") + .assertPlan( + """ + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery("source=catalog.employees | where match_phrase(name, 'John Doe')") + .assertPlan( + """ + LogicalFilter(condition=[match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery("source=catalog.employees | where match_bool_prefix(name, 'John')") + .assertPlan( + """ + LogicalFilter(condition=[match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery("source=catalog.employees | where match_phrase_prefix(name, 'John')") + .assertPlan( + """ + LogicalFilter(condition=[match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMultiMatch() { + givenQuery("source=catalog.employees | where multi_match(['name', 'department'], 'John')") + .assertPlan( + """ + LogicalFilter(condition=[multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE, 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testSimpleQueryString() { + givenQuery("source=catalog.employees | where simple_query_string(['name'], 'John')") + .assertPlan( + """ + LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testQueryString() { + givenQuery("source=catalog.employees | where query_string(['name'], 'John')") + .assertPlan( + """ + LogicalFilter(condition=[query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } +} diff --git a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java index eaaaccbdbf..3f19d69f19 100644 --- a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java +++ b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java @@ -8,6 +8,7 @@ import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.util.List; import java.util.Map; @@ -164,6 +165,15 @@ public QueryAssert assertPlan(String expected) { return this; } + /** Assert the logical plan contains the expected substring. */ + public QueryAssert assertPlanContains(String expected) { + String planStr = RelOptUtil.toString(plan).replaceAll("\\r\\n", "\n"); + assertTrue( + "Expected plan to contain: " + expected + "\nActual plan:\n" + planStr, + planStr.contains(expected)); + return this; + } + /** Assert the output field names match. */ public QueryAssert assertFields(String... names) { assertEquals(List.of(names), plan.getRowType().getFieldNames()); From acade43ff603e5a6599466156ed15e6d8446bae8 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Fri, 3 Apr 2026 12:34:15 -0700 Subject: [PATCH 2/2] feat: V2 named-argument syntax with NamedArgRewriter Add NamedArgRewriter SqlShuttle that normalizes V2/PPL relevance syntax into MAP-based form before Calcite validation. Transforms positional and key=value arguments into MAP[paramName, value] pairs matching PPL's internal representation for uniform pushdown rules. Refactor UnifiedFunctionSpec to instance-based design with fluent builder and Category record for grouping. Use SqlUserDefinedFunction for consistency with PPL path. Add error tests and QueryErrorAssert to test base. Signed-off-by: Chen Dai --- .../sql/api/UnifiedQueryContext.java | 6 +- .../sql/api/UnifiedQueryPlanner.java | 4 +- .../sql/api/parser/NamedArgRewriter.java | 65 ++++++ .../sql/api/spec/UnifiedFunctionSpec.java | 216 ++++++++++-------- .../api/UnifiedRelevanceSearchSqlTest.java | 141 ++++++++---- .../sql/api/UnifiedRelevanceSearchTest.java | 56 +++-- .../sql/api/UnifiedQueryTestBase.java | 31 +++ 7 files changed, 349 insertions(+), 170 deletions(-) create mode 100644 api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index dee030731e..a8c4a30211 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -25,6 +25,7 @@ import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.util.SqlOperatorTables; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; @@ -241,12 +242,13 @@ public List getSettings() { private FrameworkConfig buildFrameworkConfig() { SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus(); catalogs.forEach(rootSchema::add); - UnifiedFunctionSpec.registerAll(rootSchema); SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace); return Frameworks.newConfigBuilder() .parserConfig(buildParserConfig()) - .operatorTable(SqlStdOperatorTable.instance()) + .operatorTable( + SqlOperatorTables.chain( + SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable())) .defaultSchema(defaultSchema) .traitDefs((List) null) .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)) diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index af4d9f518a..74800db4a3 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -17,6 +17,7 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Planner; +import org.opensearch.sql.api.parser.NamedArgRewriter; import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.calcite.CalciteRelNodeVisitor; @@ -81,7 +82,8 @@ private static class CalciteNativeStrategy implements PlanningStrategy { public RelNode plan(String query) throws Exception { try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) { SqlNode parsed = planner.parse(query); - SqlNode validated = planner.validate(parsed); + SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE); + SqlNode validated = planner.validate(rewritten); RelRoot relRoot = planner.rel(validated); return relRoot.project(); } diff --git a/api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java b/api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java new file mode 100644 index 0000000000..629d92442f --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.util.SqlShuttle; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Pre-validation rewriter for backward compatibility with non-standard named-argument syntax (e.g., + * {@code operator='AND'} instead of {@code operator => 'AND'}). Normalizes relevance function calls + * into MAP-based form so SQL and PPL paths produce identical query plans for pushdown rules. + * + *

This rewriter is subject to removal if we adopt standard SQL named-argument syntax. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public final class NamedArgRewriter extends SqlShuttle { + + public static final NamedArgRewriter INSTANCE = new NamedArgRewriter(); + + @Override + public @Nullable SqlNode visit(SqlCall call) { + SqlCall visited = (SqlCall) super.visit(call); + return UnifiedFunctionSpec.of(visited.getOperator().getName()) + .filter(UnifiedFunctionSpec.RELEVANCE::contains) + .map(spec -> (SqlNode) rewriteToMaps(visited, spec.getParamNames())) + .orElse(visited); + } + + /** + * Rewrites each argument into a MAP entry. For match(name, 'John', operator='AND'): + *

  • Positional arg: name → MAP('field', name) + *
  • Named arg: operator='AND' → MAP('operator', 'AND') + */ + private static SqlCall rewriteToMaps(SqlCall call, List paramNames) { + List operands = call.getOperandList(); + SqlNode[] maps = new SqlNode[operands.size()]; + for (int i = 0; i < operands.size(); i++) { + SqlNode op = operands.get(i); + if (op instanceof SqlCall eq && op.getKind() == SqlKind.EQUALS) { + maps[i] = toMap(eq.operand(0).toString(), eq.operand(1)); + } else { + maps[i] = toMap(paramNames.get(i), op); + } + } + return call.getOperator().createCall(call.getParserPosition(), maps); + } + + private static SqlNode toMap(String key, SqlNode value) { + return SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall( + SqlParserPos.ZERO, SqlLiteral.createCharString(key, SqlParserPos.ZERO), value); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java index 15c09a76d7..f60fc61a50 100644 --- a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -5,129 +5,167 @@ package org.opensearch.sql.api.spec; +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; + import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.stream.IntStream; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.AccessLevel; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.schema.FunctionParameter; -import org.apache.calcite.schema.ScalarFunction; -import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.sql.type.SqlTypeName; -import org.checkerframework.checker.nullness.qual.Nullable; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.util.SqlOperatorTables; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; /** - * Central registry of language-specified function signatures (Unified Language Specification - * layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required - * parameters of type {@link SqlTypeName#ANY}. - * - *

    This class defines what functions exist and their signatures. Function - * implementations live in the Unified Execution Runtime (UER) layer — see {@link - * org.opensearch.sql.api.function.UnifiedFunction} and {@link - * org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions - * (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER. - * - *

    Named parameters enable SQL named-argument syntax ({@code match(field => col, query => - * 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no - * optional params), CALCITE-5366 - * is avoided entirely. - * - *

    Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)}, - * following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives - * available regardless of catalog. Pushdown rules enforce data-source capability at optimization - * time. - * - * @see org.opensearch.sql.api.function.UnifiedFunction - * @see org.opensearch.sql.api.function.UnifiedFunctionRepository + * Declarative registry of language-level functions for the unified query engine. Functions defined + * here are part of the language spec — always resolvable regardless of the underlying data source. + * They are grouped into {@link Category categories} that callers chain into Calcite's operator + * table. Data-source capability is enforced at optimization time by pushdown rules. */ -// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here, -// rather than independently discovering from PPLBuiltinOperators. The spec is the source of -// truth for what functions exist; UER provides how they execute. Decide whether to late-bind -// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent -// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only) -// are registered here. +@Getter +@ToString(of = "funcName") +@EqualsAndHashCode(of = "funcName") +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) public final class UnifiedFunctionSpec { - private UnifiedFunctionSpec() {} - - /** Single-field relevance function params: (field, query). */ - private static final List SINGLE_FIELD_PARAMS = List.of("field", "query"); + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ + private final String funcName; + + /** Calcite operator for chaining into the framework config's operator table. */ + private final SqlOperator operator; + + /** Full-text search functions. */ + public static final Category RELEVANCE = + new Category( + List.of( + function("match").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); + + /** All registered function specs, keyed by function name. */ + private static final Map ALL_SPECS = + Stream.of(RELEVANCE) + .flatMap(c -> c.specs().stream()) + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); + + /** + * Looks up a function spec by name across all categories. + * + * @param name function name (case-insensitive) + * @return the spec, or empty if not found + */ + public static Optional of(String name) { + return Optional.ofNullable(ALL_SPECS.get(name.toLowerCase())); + } - /** Multi-field relevance function params: (fields, query). */ - private static final List MULTI_FIELD_PARAMS = List.of("fields", "query"); + /** + * @return required param names from {@link SqlOperandMetadata}, or empty if not available. + */ + public List getParamNames() { + return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata + ? metadata.paramNames() + : List.of(); + } - private static final Map REGISTRY = - Map.of( - "match", scalarFunction(SINGLE_FIELD_PARAMS), - "match_phrase", scalarFunction(SINGLE_FIELD_PARAMS), - "match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS), - "match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS), - "multi_match", scalarFunction(MULTI_FIELD_PARAMS), - "simple_query_string", scalarFunction(MULTI_FIELD_PARAMS), - "query_string", scalarFunction(MULTI_FIELD_PARAMS)); + /** A group of function specs that can be chained into Calcite's operator table. */ + public record Category(List specs) { + public SqlOperatorTable operatorTable() { + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); + } - /** Registers all language-specified functions on the given schema (typically root). */ - public static void registerAll(SchemaPlus schema) { - REGISTRY.forEach(schema::add); + /** Returns true if this category contains the given spec. */ + public boolean contains(UnifiedFunctionSpec spec) { + return specs.contains(spec); + } } - /** Returns the canonical ScalarFunction for a language-specified function, or null. */ - public static @Nullable ScalarFunction get(String name) { - return REGISTRY.get(name); + public static Builder function(String name) { + return new Builder(name); } - /** Returns true if the name is a language-specified function. */ - public static boolean isLanguageFunction(String name) { - return REGISTRY.containsKey(name); - } + /** Fluent builder for function specs. */ + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) + public static class Builder { + private final String funcName; + private List paramNames = List.of(); + private SqlReturnTypeInference returnType; - /** All registered language function names. */ - public static Set names() { - return REGISTRY.keySet(); - } + public Builder vararg(String... names) { + this.paramNames = List.of(names); + return this; + } - private static ScalarFunction scalarFunction(List paramNames) { - List params = - IntStream.range(0, paramNames.size()) - .mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i))) - .toList(); - return new BooleanScalarFunction(params); - } + public Builder returnType(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } - /** A ScalarFunction that returns BOOLEAN with the given parameters. */ - private record BooleanScalarFunction(List params) implements ScalarFunction { - @Override - public List getParameters() { - return params; + public UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returnType is required"); + return new UnifiedFunctionSpec( + funcName, + new SqlUserDefinedFunction( + new SqlIdentifier(funcName, SqlParserPos.ZERO), + SqlKind.OTHER_FUNCTION, + returnType, + InferTypes.ANY_NULLABLE, + new VariadicOperandMetadata(paramNames), + List::of)); // Pushdown-only: no local implementation } + } + + /** + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts + * any operand types and delegates validation to pushdown. + */ + private record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { @Override - public RelDataType getReturnType(RelDataTypeFactory typeFactory) { - return typeFactory.createSqlType(SqlTypeName.BOOLEAN); + public List paramNames() { + return paramNames; } - } - /** A required function parameter of type ANY. */ - private record AnyParam(int ordinal, String name) implements FunctionParameter { @Override - public int getOrdinal() { - return ordinal; + public List paramTypes(RelDataTypeFactory tf) { + return List.of(); } @Override - public String getName() { - return name; + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { + return true; // Bypass: CALCITE-5366 breaks optional argument type checking } @Override - public boolean isOptional() { - return false; + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(paramNames.size()); } @Override - public RelDataType getType(RelDataTypeFactory typeFactory) { - return typeFactory.createSqlType(SqlTypeName.ANY); + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; } } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java index ed1d6ab6e9..cbe97a8114 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java @@ -9,9 +9,9 @@ import org.opensearch.sql.executor.QueryType; /** - * Tests for relevance search functions in SQL planning path. Functions are registered globally on - * the root schema via {@link org.opensearch.sql.api.spec.UnifiedFunctionSpec#registerAll}, resolved - * through Calcite's standard CalciteCatalogReader → toOp() pipeline. + * Tests for relevance search functions in SQL planning path using V2/PPL syntax. Mirrors the PPL + * tests in {@link UnifiedRelevanceSearchTest} with equivalent SQL queries. Both paths produce + * identical MAP-based plans for pushdown rules. */ public class UnifiedRelevanceSearchSqlTest extends UnifiedQueryTestBase { @@ -25,7 +25,7 @@ public void testMatch() { givenQuery( """ SELECT * FROM catalog.employees - WHERE "match"(MAP['field', name], MAP['query', 'John'])\ + WHERE "match"(name, 'John')\ """) .assertPlan( """ @@ -40,14 +40,9 @@ public void testMatchPhrase() { givenQuery( """ SELECT * FROM catalog.employees - WHERE match_phrase(MAP['field', name], MAP['query', 'John Doe'])\ + WHERE match_phrase(name, 'John Doe')\ """) - .assertPlan( - """ - LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) - LogicalFilter(condition=[match_phrase(MAP('field', $1), MAP('query', 'John Doe'))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe'))"); } @Test @@ -55,14 +50,9 @@ public void testMatchBoolPrefix() { givenQuery( """ SELECT * FROM catalog.employees - WHERE match_bool_prefix(MAP['field', name], MAP['query', 'John'])\ + WHERE match_bool_prefix(name, 'John')\ """) - .assertPlan( - """ - LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) - LogicalFilter(condition=[match_bool_prefix(MAP('field', $1), MAP('query', 'John'))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John'))"); } @Test @@ -70,14 +60,9 @@ public void testMatchPhrasePrefix() { givenQuery( """ SELECT * FROM catalog.employees - WHERE match_phrase_prefix(MAP['field', name], MAP['query', 'John'])\ + WHERE match_phrase_prefix(name, 'John')\ """) - .assertPlan( - """ - LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) - LogicalFilter(condition=[match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))"); } @Test @@ -85,15 +70,9 @@ public void testMultiMatch() { givenQuery( """ SELECT * FROM catalog.employees - WHERE multi_match(\ - MAP['fields', MAP['name', 1.0, 'department', 2.0]], MAP['query', 'John'])\ + WHERE multi_match(name, 'John')\ """) - .assertPlan( - """ - LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) - LogicalFilter(condition=[multi_match(MAP('fields', MAP(CAST('name'):CHAR(10) NOT NULL, 1.0:DECIMAL(2, 1), 'department', 2.0:DECIMAL(2, 1))), MAP('query', 'John'))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("multi_match(MAP('fields', $1), MAP('query', 'John'))"); } @Test @@ -101,15 +80,9 @@ public void testSimpleQueryString() { givenQuery( """ SELECT * FROM catalog.employees - WHERE simple_query_string(\ - MAP['fields', MAP['name', 1.0]], MAP['query', 'John'])\ + WHERE simple_query_string(name, 'John')\ """) - .assertPlan( - """ - LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) - LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name', 1.0:DECIMAL(2, 1))), MAP('query', 'John'))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("simple_query_string(MAP('fields', $1), MAP('query', 'John'))"); } @Test @@ -117,14 +90,86 @@ public void testQueryString() { givenQuery( """ SELECT * FROM catalog.employees - WHERE query_string(\ - MAP['fields', MAP['name', 1.0]], MAP['query', 'John'])\ + WHERE query_string(name, 'John')\ """) - .assertPlan( + .assertPlanContains("query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchWithOptions() { + givenQuery( """ - LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) - LogicalFilter(condition=[query_string(MAP('fields', MAP('name', 1.0:DECIMAL(2, 1))), MAP('query', 'John'))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + SELECT * FROM catalog.employees + WHERE "match"(name, 'John', operator='AND', boost=2.0)\ + """) + .assertPlanContains( + "match(MAP('field', $1), MAP('query', 'John')," + + " MAP('operator', 'AND'), MAP('boost', 2.0:DECIMAL(2, 1)))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE "match"('John')\ + """) + .assertErrorMessage( + "No match found for function signature match(<(CHAR(5), CHAR(4)) MAP>)"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE unknown_relevance(name, 'John')\ + """) + .assertErrorMessage( + "No match found for function signature unknown_relevance(, )"); + } + + // FIXME: Calcite's SQL parser does not support V2 bracket field list syntax ['field1', 'field2']. + // Multi-field relevance functions only accept a single column reference in the Calcite SQL path. + // See: https://github.com/opensearch-project/sql/issues/XXXX + + @Test + public void testMultiMatchBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testMultiMatchFieldBoostNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name' ^ 2.0, 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testSimpleQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); } } diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java index 06aa25d126..a80ae19086 100644 --- a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java @@ -23,60 +23,56 @@ public void testMatch() { @Test public void testMatchPhrase() { givenQuery("source=catalog.employees | where match_phrase(name, 'John Doe')") - .assertPlan( - """ - LogicalFilter(condition=[match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))"); } @Test public void testMatchBoolPrefix() { givenQuery("source=catalog.employees | where match_bool_prefix(name, 'John')") - .assertPlan( - """ - LogicalFilter(condition=[match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); } @Test public void testMatchPhrasePrefix() { givenQuery("source=catalog.employees | where match_phrase_prefix(name, 'John')") - .assertPlan( - """ - LogicalFilter(condition=[match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); } @Test public void testMultiMatch() { givenQuery("source=catalog.employees | where multi_match(['name', 'department'], 'John')") - .assertPlan( - """ - LogicalFilter(condition=[multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE, 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains( + "multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE," + + " 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))"); } @Test public void testSimpleQueryString() { givenQuery("source=catalog.employees | where simple_query_string(['name'], 'John')") - .assertPlan( - """ - LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains( + "simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); } @Test public void testQueryString() { givenQuery("source=catalog.employees | where query_string(['name'], 'John')") - .assertPlan( - """ - LogicalFilter(condition=[query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))]) - LogicalTableScan(table=[[catalog, employees]]) - """); + .assertPlanContains( + "query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery("source=catalog.employees | where match('John')") + .assertErrorMessage( + "[)] is not a valid term at this part of the query:" + + " '...| where match('John')' <-- HERE. Expecting tokens: ','"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery("source=catalog.employees | where unknown_relevance(name, 'John')") + .assertErrorMessage("[(] is not a valid term at this part of the query"); } } diff --git a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java index 3f19d69f19..9838aa5b13 100644 --- a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java +++ b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java @@ -149,6 +149,37 @@ protected QueryAssert givenQuery(String query) { return new QueryAssert(planner.plan(query)); } + /** Fluent helper for asserting query planning errors. */ + protected QueryErrorAssert givenInvalidQuery(String query) { + try { + planner.plan(query); + throw new AssertionError("Expected query to fail: " + query); + } catch (Exception e) { + return new QueryErrorAssert(e); + } + } + + /** Fluent assertion on a query planning error. */ + protected static class QueryErrorAssert { + private final Exception error; + + QueryErrorAssert(Exception error) { + this.error = error; + } + + /** Assert the root cause error message contains the expected substring. */ + public QueryErrorAssert assertErrorMessage(String expected) { + Throwable cause = error; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + assertTrue( + "Expected error to contain: " + expected + "\nActual: " + cause.getMessage(), + cause.getMessage().contains(expected)); + return this; + } + } + /** Fluent assertion on a query's logical plan. */ protected static class QueryAssert { private final RelNode plan;