diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java index 4332ff1766..a8c4a30211 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java @@ -23,13 +23,16 @@ import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.schema.Schema; import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.util.SqlOperatorTables; import org.apache.calcite.tools.FrameworkConfig; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Programs; import org.opensearch.sql.api.parser.CalciteSqlQueryParser; import org.opensearch.sql.api.parser.PPLQueryParser; import org.opensearch.sql.api.parser.UnifiedQueryParser; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; import org.opensearch.sql.calcite.CalcitePlanContext; import org.opensearch.sql.calcite.SysLimit; import org.opensearch.sql.common.setting.Settings; @@ -243,6 +246,9 @@ private FrameworkConfig buildFrameworkConfig() { SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace); return Frameworks.newConfigBuilder() .parserConfig(buildParserConfig()) + .operatorTable( + SqlOperatorTables.chain( + SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable())) .defaultSchema(defaultSchema) .traitDefs((List) null) .programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE)) diff --git a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java index af4d9f518a..74800db4a3 100644 --- a/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java +++ b/api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java @@ -17,6 +17,7 @@ import org.apache.calcite.sql.SqlNode; import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.Planner; +import org.opensearch.sql.api.parser.NamedArgRewriter; import org.opensearch.sql.api.parser.UnifiedQueryParser; import org.opensearch.sql.ast.tree.UnresolvedPlan; import org.opensearch.sql.calcite.CalciteRelNodeVisitor; @@ -81,7 +82,8 @@ private static class CalciteNativeStrategy implements PlanningStrategy { public RelNode plan(String query) throws Exception { try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) { SqlNode parsed = planner.parse(query); - SqlNode validated = planner.validate(parsed); + SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE); + SqlNode validated = planner.validate(rewritten); RelRoot relRoot = planner.rel(validated); return relRoot.project(); } diff --git a/api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java b/api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java new file mode 100644 index 0000000000..629d92442f --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/parser/NamedArgRewriter.java @@ -0,0 +1,65 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.parser; + +import java.util.List; +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlLiteral; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.util.SqlShuttle; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.opensearch.sql.api.spec.UnifiedFunctionSpec; + +/** + * Pre-validation rewriter for backward compatibility with non-standard named-argument syntax (e.g., + * {@code operator='AND'} instead of {@code operator => 'AND'}). Normalizes relevance function calls + * into MAP-based form so SQL and PPL paths produce identical query plans for pushdown rules. + * + *

This rewriter is subject to removal if we adopt standard SQL named-argument syntax. + */ +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public final class NamedArgRewriter extends SqlShuttle { + + public static final NamedArgRewriter INSTANCE = new NamedArgRewriter(); + + @Override + public @Nullable SqlNode visit(SqlCall call) { + SqlCall visited = (SqlCall) super.visit(call); + return UnifiedFunctionSpec.of(visited.getOperator().getName()) + .filter(UnifiedFunctionSpec.RELEVANCE::contains) + .map(spec -> (SqlNode) rewriteToMaps(visited, spec.getParamNames())) + .orElse(visited); + } + + /** + * Rewrites each argument into a MAP entry. For match(name, 'John', operator='AND'): + *

  • Positional arg: name → MAP('field', name) + *
  • Named arg: operator='AND' → MAP('operator', 'AND') + */ + private static SqlCall rewriteToMaps(SqlCall call, List paramNames) { + List operands = call.getOperandList(); + SqlNode[] maps = new SqlNode[operands.size()]; + for (int i = 0; i < operands.size(); i++) { + SqlNode op = operands.get(i); + if (op instanceof SqlCall eq && op.getKind() == SqlKind.EQUALS) { + maps[i] = toMap(eq.operand(0).toString(), eq.operand(1)); + } else { + maps[i] = toMap(paramNames.get(i), op); + } + } + return call.getOperator().createCall(call.getParserPosition(), maps); + } + + private static SqlNode toMap(String key, SqlNode value) { + return SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR.createCall( + SqlParserPos.ZERO, SqlLiteral.createCharString(key, SqlParserPos.ZERO), value); + } +} diff --git a/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java new file mode 100644 index 0000000000..f60fc61a50 --- /dev/null +++ b/api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java @@ -0,0 +1,171 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api.spec; + +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.AccessLevel; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.calcite.sql.parser.SqlParserPos; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlOperandMetadata; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.util.SqlOperatorTables; +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; + +/** + * Declarative registry of language-level functions for the unified query engine. Functions defined + * here are part of the language spec — always resolvable regardless of the underlying data source. + * They are grouped into {@link Category categories} that callers chain into Calcite's operator + * table. Data-source capability is enforced at optimization time by pushdown rules. + */ +@Getter +@ToString(of = "funcName") +@EqualsAndHashCode(of = "funcName") +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +public final class UnifiedFunctionSpec { + + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ + private final String funcName; + + /** Calcite operator for chaining into the framework config's operator table. */ + private final SqlOperator operator; + + /** Full-text search functions. */ + public static final Category RELEVANCE = + new Category( + List.of( + function("match").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); + + /** All registered function specs, keyed by function name. */ + private static final Map ALL_SPECS = + Stream.of(RELEVANCE) + .flatMap(c -> c.specs().stream()) + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); + + /** + * Looks up a function spec by name across all categories. + * + * @param name function name (case-insensitive) + * @return the spec, or empty if not found + */ + public static Optional of(String name) { + return Optional.ofNullable(ALL_SPECS.get(name.toLowerCase())); + } + + /** + * @return required param names from {@link SqlOperandMetadata}, or empty if not available. + */ + public List getParamNames() { + return operator.getOperandTypeChecker() instanceof SqlOperandMetadata metadata + ? metadata.paramNames() + : List.of(); + } + + /** A group of function specs that can be chained into Calcite's operator table. */ + public record Category(List specs) { + public SqlOperatorTable operatorTable() { + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); + } + + /** Returns true if this category contains the given spec. */ + public boolean contains(UnifiedFunctionSpec spec) { + return specs.contains(spec); + } + } + + public static Builder function(String name) { + return new Builder(name); + } + + /** Fluent builder for function specs. */ + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) + public static class Builder { + private final String funcName; + private List paramNames = List.of(); + private SqlReturnTypeInference returnType; + + public Builder vararg(String... names) { + this.paramNames = List.of(names); + return this; + } + + public Builder returnType(SqlReturnTypeInference type) { + this.returnType = type; + return this; + } + + public UnifiedFunctionSpec build() { + Objects.requireNonNull(returnType, "returnType is required"); + return new UnifiedFunctionSpec( + funcName, + new SqlUserDefinedFunction( + new SqlIdentifier(funcName, SqlParserPos.ZERO), + SqlKind.OTHER_FUNCTION, + returnType, + InferTypes.ANY_NULLABLE, + new VariadicOperandMetadata(paramNames), + List::of)); // Pushdown-only: no local implementation + } + } + + /** + * Custom operand metadata that bypasses Calcite's built-in type checking. Calcite's {@code + * FamilyOperandTypeChecker} rejects variadic calls (CALCITE-5366), so this implementation accepts + * any operand types and delegates validation to pushdown. + */ + private record VariadicOperandMetadata(List paramNames) implements SqlOperandMetadata { + + @Override + public List paramNames() { + return paramNames; + } + + @Override + public List paramTypes(RelDataTypeFactory tf) { + return List.of(); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { + return true; // Bypass: CALCITE-5366 breaks optional argument type checking + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.from(paramNames.size()); + } + + @Override + public String getAllowedSignatures(SqlOperator op, String opName) { + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; + } + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java new file mode 100644 index 0000000000..cbe97a8114 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchSqlTest.java @@ -0,0 +1,175 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; +import org.opensearch.sql.executor.QueryType; + +/** + * Tests for relevance search functions in SQL planning path using V2/PPL syntax. Mirrors the PPL + * tests in {@link UnifiedRelevanceSearchTest} with equivalent SQL queries. Both paths produce + * identical MAP-based plans for pushdown rules. + */ +public class UnifiedRelevanceSearchSqlTest extends UnifiedQueryTestBase { + + @Override + protected QueryType queryType() { + return QueryType.SQL; + } + + @Test + public void testMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE "match"(name, 'John')\ + """) + .assertPlan( + """ + LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3]) + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John'))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase(name, 'John Doe')\ + """) + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe'))"); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_bool_prefix(name, 'John')\ + """) + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE match_phrase_prefix(name, 'John')\ + """) + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))"); + } + + @Test + public void testMultiMatch() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(name, 'John')\ + """) + .assertPlanContains("multi_match(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testSimpleQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(name, 'John')\ + """) + .assertPlanContains("simple_query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testQueryString() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(name, 'John')\ + """) + .assertPlanContains("query_string(MAP('fields', $1), MAP('query', 'John'))"); + } + + @Test + public void testMatchWithOptions() { + givenQuery( + """ + SELECT * FROM catalog.employees + WHERE "match"(name, 'John', operator='AND', boost=2.0)\ + """) + .assertPlanContains( + "match(MAP('field', $1), MAP('query', 'John')," + + " MAP('operator', 'AND'), MAP('boost', 2.0:DECIMAL(2, 1)))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE "match"('John')\ + """) + .assertErrorMessage( + "No match found for function signature match(<(CHAR(5), CHAR(4)) MAP>)"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE unknown_relevance(name, 'John')\ + """) + .assertErrorMessage( + "No match found for function signature unknown_relevance(, )"); + } + + // FIXME: Calcite's SQL parser does not support V2 bracket field list syntax ['field1', 'field2']. + // Multi-field relevance functions only accept a single column reference in the Calcite SQL path. + // See: https://github.com/opensearch-project/sql/issues/XXXX + + @Test + public void testMultiMatchBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testMultiMatchFieldBoostNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE multi_match(['name' ^ 2.0, 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testSimpleQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE simple_query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } + + @Test + public void testQueryStringBracketSyntaxNotSupported() { + givenInvalidQuery( + """ + SELECT * FROM catalog.employees + WHERE query_string(['name', 'department'], 'John')\ + """) + .assertErrorMessage("Encountered \"[\" at line"); + } +} diff --git a/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java new file mode 100644 index 0000000000..a80ae19086 --- /dev/null +++ b/api/src/test/java/org/opensearch/sql/api/UnifiedRelevanceSearchTest.java @@ -0,0 +1,78 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.api; + +import org.junit.Test; + +/** Tests for relevance search functions in PPL planning path. */ +public class UnifiedRelevanceSearchTest extends UnifiedQueryTestBase { + + @Test + public void testMatch() { + givenQuery("source=catalog.employees | where match(name, 'John')") + .assertPlan( + """ + LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John':VARCHAR))]) + LogicalTableScan(table=[[catalog, employees]]) + """); + } + + @Test + public void testMatchPhrase() { + givenQuery("source=catalog.employees | where match_phrase(name, 'John Doe')") + .assertPlanContains("match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))"); + } + + @Test + public void testMatchBoolPrefix() { + givenQuery("source=catalog.employees | where match_bool_prefix(name, 'John')") + .assertPlanContains("match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchPhrasePrefix() { + givenQuery("source=catalog.employees | where match_phrase_prefix(name, 'John')") + .assertPlanContains("match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMultiMatch() { + givenQuery("source=catalog.employees | where multi_match(['name', 'department'], 'John')") + .assertPlanContains( + "multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE," + + " 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testSimpleQueryString() { + givenQuery("source=catalog.employees | where simple_query_string(['name'], 'John')") + .assertPlanContains( + "simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testQueryString() { + givenQuery("source=catalog.employees | where query_string(['name'], 'John')") + .assertPlanContains( + "query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE))," + + " MAP('query', 'John':VARCHAR))"); + } + + @Test + public void testMatchMissingArguments() { + givenInvalidQuery("source=catalog.employees | where match('John')") + .assertErrorMessage( + "[)] is not a valid term at this part of the query:" + + " '...| where match('John')' <-- HERE. Expecting tokens: ','"); + } + + @Test + public void testUnknownRelevanceFunction() { + givenInvalidQuery("source=catalog.employees | where unknown_relevance(name, 'John')") + .assertErrorMessage("[(] is not a valid term at this part of the query"); + } +} diff --git a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java index eaaaccbdbf..9838aa5b13 100644 --- a/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java +++ b/api/src/testFixtures/java/org/opensearch/sql/api/UnifiedQueryTestBase.java @@ -8,6 +8,7 @@ import static org.apache.calcite.sql.type.SqlTypeName.INTEGER; import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; import java.util.List; import java.util.Map; @@ -148,6 +149,37 @@ protected QueryAssert givenQuery(String query) { return new QueryAssert(planner.plan(query)); } + /** Fluent helper for asserting query planning errors. */ + protected QueryErrorAssert givenInvalidQuery(String query) { + try { + planner.plan(query); + throw new AssertionError("Expected query to fail: " + query); + } catch (Exception e) { + return new QueryErrorAssert(e); + } + } + + /** Fluent assertion on a query planning error. */ + protected static class QueryErrorAssert { + private final Exception error; + + QueryErrorAssert(Exception error) { + this.error = error; + } + + /** Assert the root cause error message contains the expected substring. */ + public QueryErrorAssert assertErrorMessage(String expected) { + Throwable cause = error; + while (cause.getCause() != null) { + cause = cause.getCause(); + } + assertTrue( + "Expected error to contain: " + expected + "\nActual: " + cause.getMessage(), + cause.getMessage().contains(expected)); + return this; + } + } + /** Fluent assertion on a query's logical plan. */ protected static class QueryAssert { private final RelNode plan; @@ -164,6 +196,15 @@ public QueryAssert assertPlan(String expected) { return this; } + /** Assert the logical plan contains the expected substring. */ + public QueryAssert assertPlanContains(String expected) { + String planStr = RelOptUtil.toString(plan).replaceAll("\\r\\n", "\n"); + assertTrue( + "Expected plan to contain: " + expected + "\nActual plan:\n" + planStr, + planStr.contains(expected)); + return this; + } + /** Assert the output field names match. */ public QueryAssert assertFields(String... names) { assertEquals(List.of(names), plan.getRowType().getFieldNames());