Skip to content
/ server Public
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mariadb-plugin-columnstore.install.generated
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
#File is generated by ColumnstoreLibrary.cmake, do not edit
etc/mysql/columnstore.cnf # added in dbcon/mysql/CMakeLists.txt
usr/local/mysql/lib/plugin/ha_columnstore.so # added in dbcon/mysql/CMakeLists.txt
70 changes: 70 additions & 0 deletions mysql-test/main/mdev_35327.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#
# MDEV-35327: Add VEC_DISTANCE_MANHATTAN function
#
#
# Checking for argument validity
#
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,2]'));
ERROR 42000: Incorrect parameter count in the call to native function 'VEC_DISTANCE_MANHATTAN'
SELECT VEC_DISTANCE_MANHATTAN(NULL, VEC_FromText('[1,2]'));
VEC_DISTANCE_MANHATTAN(NULL, VEC_FromText('[1,2]'))
NULL
# Checking for mismatched dimensions
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,1,1]'),VEC_FromText('[1,2]'));
VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,1,1]'),VEC_FromText('[1,2]'))
NULL
#
# Basic math check
#
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,2,3]'), VEC_FromText('[2,3,4]'));
VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,2,3]'), VEC_FromText('[2,3,4]'))
3
#
# Without Vector Index
#
CREATE TABLE t1 (id INT, v VECTOR(3) NOT NULL);
INSERT INTO t1 VALUES (1, VEC_FromText('[2,2,2]')), (2, VEC_FromText('[0,0,5]')), (3, VEC_FromText('[1,1,1]'));
# Manhattan distance:- 6,5,3 Euclidean distance:- 3.46,5,1.73
# Manhattan | Euclidean
# P3 P3
# P2 P1
# P1 P2
# output should be 3,5,6 and ordering should be P3 < P2 < P1
SELECT id, VEC_DISTANCE_MANHATTAN(v, VEC_FromText('[0,0,0]')) as dist FROM t1 ORDER BY dist;
id dist
3 3
2 5
1 6
# Comparison with Euclidean distance
SELECT id, VEC_DISTANCE_EUCLIDEAN(v, VEC_FromText('[0,0,0]')) as dist FROM t1 ORDER BY dist;
id dist
3 1.7320508075688772
1 3.4641016151377544
2 5
#
# With Vector Index
#
CREATE VECTOR INDEX idx ON t1(v) DISTANCE=manhattan;
# Output should be 3,5 and 6 again
SELECT id, VEC_DISTANCE_MANHATTAN(v, VEC_FromText('[0,0,0]')) as dist FROM t1 ORDER BY dist LIMIT 3;
id dist
3 3
2 5
1 6
# Checking if the vector index is actually implemented using manhattan distance
EXPLAIN SELECT id FROM t1 FORCE INDEX (idx)
ORDER BY VEC_DISTANCE_MANHATTAN(v, VEC_FromText('[0,0,0]')) LIMIT 1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL idx 14 NULL 1
# Cleanup
DROP TABLE t1;
# Miscellaneous Tests
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[-1,-1]'), VEC_FromText('[1,1]')) as neg_test;
neg_test
4
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1.5, 2.5]'), VEC_FromText('[1.5, 2.5]')) as zero_dist;
zero_dist
0
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1.1]'), VEC_FromText('[2.2]')) as float_test;
float_test
1.100000023841858
58 changes: 58 additions & 0 deletions mysql-test/main/mdev_35327.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
--echo #
--echo # MDEV-35327: Add VEC_DISTANCE_MANHATTAN function
--echo #

--echo #
--echo # Checking for argument validity
--echo #
--error ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,2]'));
SELECT VEC_DISTANCE_MANHATTAN(NULL, VEC_FromText('[1,2]'));
--echo # Checking for mismatched dimensions
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,1,1]'),VEC_FromText('[1,2]'));

--echo #
--echo # Basic math check
--echo #
SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1,2,3]'), VEC_FromText('[2,3,4]'));


--echo #
--echo # Without Vector Index
--echo #
CREATE TABLE t1 (id INT, v VECTOR(3) NOT NULL);
INSERT INTO t1 VALUES (1, VEC_FromText('[2,2,2]')), (2, VEC_FromText('[0,0,5]')), (3, VEC_FromText('[1,1,1]'));

--echo # Manhattan distance:- 6,5,3 Euclidean distance:- 3.46,5,1.73
--echo # Manhattan | Euclidean
--echo # P3 P3
--echo # P2 P1
--echo # P1 P2
--echo # output should be 3,5,6 and ordering should be P3 < P2 < P1

SELECT id, VEC_DISTANCE_MANHATTAN(v, VEC_FromText('[0,0,0]')) as dist FROM t1 ORDER BY dist;
--echo # Comparison with Euclidean distance
SELECT id, VEC_DISTANCE_EUCLIDEAN(v, VEC_FromText('[0,0,0]')) as dist FROM t1 ORDER BY dist;

--echo #
--echo # With Vector Index
--echo #
CREATE VECTOR INDEX idx ON t1(v) DISTANCE=manhattan;

--echo # Output should be 3,5 and 6 again
SELECT id, VEC_DISTANCE_MANHATTAN(v, VEC_FromText('[0,0,0]')) as dist FROM t1 ORDER BY dist LIMIT 3;

--echo # Checking if the vector index is actually implemented using manhattan distance
EXPLAIN SELECT id FROM t1 FORCE INDEX (idx)
ORDER BY VEC_DISTANCE_MANHATTAN(v, VEC_FromText('[0,0,0]')) LIMIT 1;

--echo # Cleanup
DROP TABLE t1;

--echo # Miscellaneous Tests

SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[-1,-1]'), VEC_FromText('[1,1]')) as neg_test;

SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1.5, 2.5]'), VEC_FromText('[1.5, 2.5]')) as zero_dist;

SELECT VEC_DISTANCE_MANHATTAN(VEC_FromText('[1.1]'), VEC_FromText('[2.2]')) as float_test;
2 changes: 1 addition & 1 deletion mysql-test/main/mysqld--help.result
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,7 @@ The following specify which files/extra groups are read (specified before remain
Supported MDL namespaces: BACKUP
--mhnsw-default-distance=name
Distance function to build the vector index for. One of:
euclidean, cosine
euclidean, cosine, manhattan
--mhnsw-default-m=# Larger values mean slower SELECTs and INSERTs, larger
index size and higher memory consumption but more
accurate results
Expand Down
2 changes: 1 addition & 1 deletion mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
Original file line number Diff line number Diff line change
Expand Up @@ -2229,7 +2229,7 @@ VARIABLE_COMMENT Distance function to build the vector index for
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST euclidean,cosine
ENUM_VALUE_LIST euclidean,cosine,manhattan
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME MHNSW_DEFAULT_M
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2469,7 +2469,7 @@ VARIABLE_COMMENT Distance function to build the vector index for
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST euclidean,cosine
ENUM_VALUE_LIST euclidean,cosine,manhattan
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME MHNSW_DEFAULT_M
Expand Down
20 changes: 20 additions & 0 deletions sql/item_create.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6237,6 +6237,24 @@ class Create_func_vec_distance_cosine: public Create_func_arg2

Create_func_vec_distance_cosine Create_func_vec_distance_cosine::s_singleton;


class Create_func_vec_distance_manhattan: public Create_func_arg2
{
public:
Item *create_2_arg(THD *thd, Item *arg1, Item *arg2) override
{ return new (thd->mem_root)
Item_func_vec_distance(thd, arg1, arg2, Item_func_vec_distance::MANHATTAN); }

static Create_func_vec_distance_manhattan s_singleton;

protected:
Create_func_vec_distance_manhattan() = default;
virtual ~Create_func_vec_distance_manhattan() = default;
};

Create_func_vec_distance_manhattan Create_func_vec_distance_manhattan::s_singleton;


class Create_func_vec_distance: public Create_func_arg2
{
public:
Expand All @@ -6251,6 +6269,7 @@ class Create_func_vec_distance: public Create_func_arg2
virtual ~Create_func_vec_distance() = default;
};


Create_func_vec_distance Create_func_vec_distance::s_singleton;

class Create_func_vec_totext: public Create_func_arg1
Expand Down Expand Up @@ -6516,6 +6535,7 @@ const Native_func_registry func_array[] =
{ { STRING_WITH_LEN("UUID_SHORT") }, BUILDER(Create_func_uuid_short)},
{ { STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") }, BUILDER(Create_func_vec_distance_euclidean)},
{ { STRING_WITH_LEN("VEC_DISTANCE_COSINE") }, BUILDER(Create_func_vec_distance_cosine)},
{ { STRING_WITH_LEN("VEC_DISTANCE_MANHATTAN") }, BUILDER(Create_func_vec_distance_manhattan)},
{ { STRING_WITH_LEN("VEC_DISTANCE") }, BUILDER(Create_func_vec_distance)},
{ { STRING_WITH_LEN("VEC_FROMTEXT") }, BUILDER(Create_func_vec_fromtext)},
{ { STRING_WITH_LEN("VEC_TOTEXT") }, BUILDER(Create_func_vec_totext)},
Expand Down
14 changes: 14 additions & 0 deletions sql/item_vectorfunc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ static double calc_distance_cosine(float *v1, float *v2, size_t v_len)
return 1 - dotp/sqrt(abs1*abs2);
}

static double calc_distance_manhattan(float *v1, float *v2, size_t v_len)
{
double d= 0;
for (size_t i= 0; i < v_len; i++, v1++, v2++)
{
double dist= abs(get_float(v1) - get_float(v2));
d+= dist;
}
return d;
}

Item_func_vec_distance::Item_func_vec_distance(THD *thd, Item *a, Item *b,
distance_kind kind)
:Item_real_func(thd, a, b), kind(kind)
Expand All @@ -59,6 +70,7 @@ bool Item_func_vec_distance::fix_length_and_dec(THD *thd)
switch (kind) {
case EUCLIDEAN: calc_distance= calc_distance_euclidean; break;
case COSINE: calc_distance= calc_distance_cosine; break;
case MANHATTAN: calc_distance= calc_distance_manhattan; break;
case AUTO:
for (uint i=0; i < 2; i++)
if (auto *item= dynamic_cast<Item_field*>(args[i]->real_item()))
Expand Down Expand Up @@ -90,10 +102,12 @@ key_map Item_func_vec_distance::part_of_sortkey() const
Field *f= item->field;
KEY *keyinfo= f->table->s->key_info;
for (uint i= f->table->s->keys; i < f->table->s->total_keys; i++)
{
if (!keyinfo[i].is_ignored && keyinfo[i].algorithm == HA_KEY_ALG_VECTOR
&& f->key_start.is_set(i)
&& mhnsw_uses_distance(f->table, keyinfo + i) == kind)
map.set_bit(i);
}
}
return map;
}
Expand Down
5 changes: 3 additions & 2 deletions sql/item_vectorfunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,14 @@ class Item_func_vec_distance: public Item_real_func
double (*calc_distance)(float *v1, float *v2, size_t v_len);

public:
enum distance_kind { EUCLIDEAN, COSINE, AUTO } kind;
enum distance_kind { EUCLIDEAN, COSINE, MANHATTAN, AUTO } kind;
Item_func_vec_distance(THD *thd, Item *a, Item *b, distance_kind kind);
LEX_CSTRING func_name_cstring() const override
{
static LEX_CSTRING name[3]= {
static LEX_CSTRING name[4]= {
{ STRING_WITH_LEN("VEC_DISTANCE_EUCLIDEAN") },
{ STRING_WITH_LEN("VEC_DISTANCE_COSINE") },
{ STRING_WITH_LEN("VEC_DISTANCE_MANHATTAN")},
{ STRING_WITH_LEN("VEC_DISTANCE") }
};
return name[kind];
Expand Down
17 changes: 12 additions & 5 deletions sql/vector_mhnsw.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ static MYSQL_THDVAR_UINT(default_m, PLUGIN_VAR_RQCMDARG,
"and higher memory consumption but more accurate results",
nullptr, nullptr, 6, 3, 200, 1);

enum metric_type : uint { EUCLIDEAN, COSINE };
static const char *distance_names[]= { "euclidean", "cosine", nullptr };
enum metric_type : uint { EUCLIDEAN, COSINE, MANHATTAN };
static const char *distance_names[]= { "euclidean", "cosine", "manhattan", nullptr };
static TYPELIB distances= CREATE_TYPELIB_FOR(distance_names);
static MYSQL_THDVAR_ENUM(default_distance, PLUGIN_VAR_RQCMDARG,
"Distance function to build the vector index for",
Expand Down Expand Up @@ -1749,9 +1749,16 @@ const LEX_CSTRING mhnsw_hlindex_table_def(THD *thd, uint ref_length)

Item_func_vec_distance::distance_kind mhnsw_uses_distance(const TABLE *table, KEY *keyinfo)
{
if (keyinfo->option_struct->metric == EUCLIDEAN)
return Item_func_vec_distance::EUCLIDEAN;
return Item_func_vec_distance::COSINE;
switch (keyinfo->option_struct->metric) {
case EUCLIDEAN:
return Item_func_vec_distance::EUCLIDEAN;
case MANHATTAN:
return Item_func_vec_distance::MANHATTAN;
case COSINE:
return Item_func_vec_distance::COSINE;
default:
return Item_func_vec_distance::COSINE;
}
}

/*
Expand Down
Binary file added storage/connect/connect_jars/JdbcInterface.jar
Binary file not shown.