From f2853f5b5ee85f918f0143637bf5ba645815c081 Mon Sep 17 00:00:00 2001 From: Rob Johnson Date: Tue, 3 Feb 2026 10:21:47 -0800 Subject: [PATCH 1/4] Let comparison functions know whether a key is a query key or an inserted key --- include/splinterdb/data.h | 11 +++++- src/btree.c | 8 ++-- src/data_internal.h | 55 +++++++++++++++++++-------- src/default_data_config.c | 12 +++--- src/splinterdb.c | 6 +-- tests/functional/filter_test.c | 4 +- tests/functional/test_functionality.c | 2 +- tests/functional/ycsb_test.c | 18 +++++---- tests/test_data.c | 8 ++-- tests/unit/btree_stress_test.c | 4 +- tests/unit/btree_test.c | 32 ++++++++-------- tests/unit/splinter_test.c | 2 +- tests/unit/splinterdb_quick_test.c | 10 ++--- 13 files changed, 103 insertions(+), 69 deletions(-) diff --git a/include/splinterdb/data.h b/include/splinterdb/data.h index aeaaa6f8..9ebfc557 100644 --- a/include/splinterdb/data.h +++ b/include/splinterdb/data.h @@ -105,7 +105,14 @@ merge_accumulator_set_class(merge_accumulator *ma, message_type type); typedef struct data_config data_config; -typedef int (*key_compare_fn)(const data_config *cfg, slice key1, slice key2); +typedef struct user_key { + slice key; + bool32 is_query_key; +} user_key; + +typedef int (*key_compare_fn)(const data_config *cfg, + user_key key1, + user_key key2); typedef uint32 (*key_hash_fn)(const void *input, size_t length, uint32 seed); @@ -133,7 +140,7 @@ typedef int (*merge_tuple_final_fn)(const data_config *cfg, merge_accumulator *oldest_message); typedef void (*key_to_str_fn)(const data_config *cfg, - slice key, + user_key key, char *str, uint64 max_len); diff --git a/src/btree.c b/src/btree.c index 76a0ad0d..7aa871ec 100644 --- a/src/btree.c +++ b/src/btree.c @@ -475,11 +475,11 @@ btree_find_pivot(const btree_config *cfg, while (lo < hi) { int64 mid = (lo + hi) / 2; - int cmp = btree_key_compare(cfg, btree_get_pivot(cfg, hdr, mid), target); + int cmp = btree_key_compare(cfg, target, btree_get_pivot(cfg, hdr, mid)); if (cmp == 0) { *found = TRUE; return mid; - } else if (cmp < 0) { + } else if (cmp > 0) { lo = mid + 1; } else { hi = mid; @@ -518,11 +518,11 @@ btree_find_tuple(const btree_config *cfg, while (lo < hi) { int64 mid = (lo + hi) / 2; int cmp = - btree_key_compare(cfg, btree_get_tuple_key(cfg, hdr, mid), target); + btree_key_compare(cfg, target, btree_get_tuple_key(cfg, hdr, mid)); if (cmp == 0) { *found = TRUE; return mid; - } else if (cmp < 0) { + } else if (cmp > 0) { lo = mid + 1; } else { hi = mid; diff --git a/src/data_internal.h b/src/data_internal.h index 88b6325b..44996f70 100644 --- a/src/data_internal.h +++ b/src/data_internal.h @@ -35,14 +35,20 @@ typedef enum { typedef struct key { key_type kind; + bool32 is_query_key; slice user_slice; } key; #define NEGATIVE_INFINITY_KEY \ - ((key){.kind = NEGATIVE_INFINITY, .user_slice = INVALID_SLICE}) + ((key){.kind = NEGATIVE_INFINITY, \ + .is_query_key = FALSE, \ + .user_slice = INVALID_SLICE}) #define POSITIVE_INFINITY_KEY \ - ((key){.kind = POSITIVE_INFINITY, .user_slice = INVALID_SLICE}) -#define NULL_KEY ((key){.kind = USER_KEY, .user_slice = NULL_SLICE}) + ((key){.kind = POSITIVE_INFINITY, \ + .is_query_key = FALSE, \ + .user_slice = INVALID_SLICE}) +#define NULL_KEY \ + ((key){.kind = USER_KEY, .is_query_key = FALSE, .user_slice = NULL_SLICE}) static inline bool32 key_is_negative_infinity(key k) @@ -62,6 +68,12 @@ key_is_user_key(key k) return k.kind == USER_KEY; } +static inline bool32 +key_is_query_key(key k) +{ + return k.is_query_key; +} + static inline slice key_slice(key k) { @@ -70,15 +82,18 @@ key_slice(key k) } static inline key -key_create_from_slice(slice user_slice) +key_create_from_slice(bool32 is_query_key, slice user_slice) { - return (key){.kind = USER_KEY, .user_slice = user_slice}; + return (key){ + .kind = USER_KEY, .is_query_key = is_query_key, .user_slice = user_slice}; } static inline key -key_create(uint64 length, const void *data) +key_create(bool32 is_query_key, uint64 length, const void *data) { - return (key){.kind = USER_KEY, .user_slice = slice_create(length, data)}; + return (key){.kind = USER_KEY, + .is_query_key = is_query_key, + .user_slice = slice_create(length, data)}; } static inline bool32 @@ -127,6 +142,7 @@ key_copy_contents(void *dst, key k) #define DEFAULT_KEY_BUFFER_SIZE (128) typedef struct { key_type kind; + bool32 is_query_key; writable_buffer wb; } key_buffer; @@ -142,7 +158,8 @@ key_buffer_key(const key_buffer *kb) } else if (kb->kind == POSITIVE_INFINITY) { return POSITIVE_INFINITY_KEY; } else { - return key_create_from_slice(writable_buffer_to_slice(&kb->wb)); + return key_create_from_slice(kb->is_query_key, + writable_buffer_to_slice(&kb->wb)); } } @@ -155,9 +172,10 @@ key_buffer_init(key_buffer *kb, platform_heap_id hid) } static inline platform_status -key_buffer_copy_slice(key_buffer *kb, slice src) +key_buffer_copy_slice(key_buffer *kb, bool32 is_query_key, slice src) { - kb->kind = USER_KEY; + kb->kind = USER_KEY; + kb->is_query_key = is_query_key; return writable_buffer_copy_slice(&kb->wb, src); } @@ -169,7 +187,7 @@ key_buffer_copy_key(key_buffer *kb, key src) } else if (key_is_positive_infinity(src)) { kb->kind = POSITIVE_INFINITY; } else { - return key_buffer_copy_slice(kb, key_slice(src)); + return key_buffer_copy_slice(kb, src.is_query_key, key_slice(src)); } return STATUS_OK; } @@ -270,7 +288,7 @@ ondisk_key_to_key(const ondisk_key *odk) } else if (odk->length == ONDISK_KEY_POSITIVE_INFINITY) { return POSITIVE_INFINITY_KEY; } else { - return key_create(odk->length, odk->bytes); + return key_create(FALSE, odk->length, odk->bytes); } } @@ -417,7 +435,7 @@ ondisk_tuple_required_data_capacity(key k, message msg) static inline key ondisk_tuple_key(const ondisk_tuple *odt) { - return key_create(odt->key_length, odt->key_and_message); + return key_create(FALSE, odt->key_length, odt->key_and_message); } static inline message_type @@ -550,7 +568,11 @@ static inline int data_key_compare(const data_config *cfg, key key1, key key2) { if (key_is_user_key(key1) && key_is_user_key(key2)) { - return cfg->key_compare(cfg, key1.user_slice, key2.user_slice); + user_key user_key1 = {.key = key1.user_slice, + .is_query_key = key1.is_query_key}; + user_key user_key2 = {.key = key2.user_slice, + .is_query_key = key2.is_query_key}; + return cfg->key_compare(cfg, user_key1, user_key2); } else { return (int)key1.kind - (int)key2.kind; } @@ -573,6 +595,7 @@ data_merge_tuples(const data_config *cfg, merge_accumulator *new_message) { debug_assert(key_is_user_key(tuple_key)); + debug_assert(!key_is_query_key(tuple_key)); if (merge_accumulator_is_definitive(new_message)) { return 0; @@ -600,6 +623,7 @@ data_merge_tuples_final(const data_config *cfg, merge_accumulator *oldest_message) { debug_assert(key_is_user_key(tuple_key)); + debug_assert(!key_is_query_key(tuple_key)); if (merge_accumulator_is_definitive(oldest_message)) { return 0; @@ -623,7 +647,8 @@ data_key_to_string(const data_config *cfg, key k, char *str, size_t size) } else if (key_is_positive_infinity(k)) { snprintf(str, size, "(positive_infinity)"); } else { - cfg->key_to_string(cfg, k.user_slice, str, size); + user_key user_key = {.key = k.user_slice, .is_query_key = k.is_query_key}; + cfg->key_to_string(cfg, user_key, str, size); } } diff --git a/src/default_data_config.c b/src/default_data_config.c index 87da136c..4528d1ad 100644 --- a/src/default_data_config.c +++ b/src/default_data_config.c @@ -19,19 +19,19 @@ typedef struct ONDISK { } message_encoding; static int -key_compare(const data_config *cfg, slice key1, slice key2) +key_compare(const data_config *cfg, user_key key1, user_key key2) { - platform_assert(slice_data(key1) != NULL); - platform_assert(slice_data(key2) != NULL); + platform_assert(slice_data(key1.key) != NULL); + platform_assert(slice_data(key2.key) != NULL); - return slice_lex_cmp(key1, key2); + return slice_lex_cmp(key1.key, key2.key); } static void -key_to_string(const data_config *cfg, slice key, char *str, size_t max_len) +key_to_string(const data_config *cfg, user_key key, char *str, size_t max_len) { - debug_hex_encode(str, max_len, slice_data(key), slice_length(key)); + debug_hex_encode(str, max_len, slice_data(key.key), slice_length(key.key)); } static void diff --git a/src/splinterdb.c b/src/splinterdb.c index 125c44ee..83603f7e 100644 --- a/src/splinterdb.c +++ b/src/splinterdb.c @@ -491,7 +491,7 @@ splinterdb_insert_message(splinterdb *kvs, // IN message msg // IN ) { - key tuple_key = key_create_from_slice(user_key); + key tuple_key = key_create_from_slice(FALSE, user_key); platform_assert(kvs != NULL); platform_status status = core_insert(&kvs->spl, tuple_key, msg); return platform_status_to_int(status); @@ -606,7 +606,7 @@ splinterdb_lookup(splinterdb *kvs, // IN { platform_status status; _splinterdb_lookup_result *_result = (_splinterdb_lookup_result *)result; - key target = key_create_from_slice(user_key); + key target = key_create_from_slice(TRUE, user_key); platform_assert(kvs != NULL); status = core_lookup(&kvs->spl, target, &_result->value); @@ -639,7 +639,7 @@ splinterdb_iterator_init(splinterdb *kvs, // IN if (slice_is_null(user_start_key)) { start_key = NEGATIVE_INFINITY_KEY; } else { - start_key = key_create_from_slice(user_start_key); + start_key = key_create_from_slice(TRUE, user_start_key); } platform_status rc = core_range_iterator_init(&kvs->spl, diff --git a/tests/functional/filter_test.c b/tests/functional/filter_test.c index 8846836c..907b2dc5 100644 --- a/tests/functional/filter_test.c +++ b/tests/functional/filter_test.c @@ -49,7 +49,7 @@ test_filter_basic(cache *cc, writable_buffer_resize(&keywb, key_size); writable_buffer_memset(&keywb, 0); uint64 *keybuf = writable_buffer_data(&keywb); - key target = key_create(key_size, keybuf); + key target = key_create(TRUE, key_size, keybuf); for (uint64 i = 0; i < num_values; i++) { if (i != 0) { num_input_keys[i] = num_input_keys[i - 1]; @@ -166,7 +166,7 @@ test_filter_perf(cache *cc, writable_buffer_resize(&keywb, key_size); writable_buffer_memset(&keywb, 0); uint64 *keybuf = writable_buffer_data(&keywb); - key target = key_create(key_size, keybuf); + key target = key_create(TRUE, key_size, keybuf); for (uint64 k = 0; k < num_trees; k++) { for (uint64 i = 0; i < num_values * num_fingerprints; i++) { uint64 idx = k * num_values * num_fingerprints + i; diff --git a/tests/functional/test_functionality.c b/tests/functional/test_functionality.c index b49971eb..948737ba 100644 --- a/tests/functional/test_functionality.c +++ b/tests/functional/test_functionality.c @@ -365,7 +365,7 @@ choose_key(data_config *cfg, // IN case VERIFY_RANGE_ENDPOINT_EQUAL: platform_assert(!is_start && !key_is_null(startkey)); *index = start_index; - key_buffer_copy_slice(keybuf, key_slice(startkey)); + key_buffer_copy_slice(keybuf, FALSE, key_slice(startkey)); break; case VERIFY_RANGE_ENDPOINT_LESS: platform_assert(!is_start && !key_is_null(startkey)); diff --git a/tests/functional/ycsb_test.c b/tests/functional/ycsb_test.c index 469c08d5..904da21c 100644 --- a/tests/functional/ycsb_test.c +++ b/tests/functional/ycsb_test.c @@ -339,8 +339,8 @@ ycsb_thread(void *arg) switch (ops->cmd) { case 'r': { - rc = - core_lookup(spl, key_create(YCSB_KEY_SIZE, ops->key), &value); + rc = core_lookup( + spl, key_create(TRUE, YCSB_KEY_SIZE, ops->key), &value); platform_assert_status_ok(rc); // if (!ops->found) { // char key_str[128]; @@ -358,17 +358,19 @@ ycsb_thread(void *arg) message val = message_create(MESSAGE_TYPE_INSERT, slice_create(YCSB_DATA_SIZE, ops->value)); - rc = core_insert(spl, key_create(YCSB_KEY_SIZE, ops->key), val); + rc = core_insert( + spl, key_create(FALSE, YCSB_KEY_SIZE, ops->key), val); platform_assert_status_ok(rc); break; } case 's': { - rc = core_apply_to_range(spl, - key_create(YCSB_KEY_SIZE, ops->key), - ops->range_len, - nop_tuple_func, - NULL); + rc = + core_apply_to_range(spl, + key_create(TRUE, YCSB_KEY_SIZE, ops->key), + ops->range_len, + nop_tuple_func, + NULL); platform_assert_status_ok(rc); break; } diff --git a/tests/test_data.c b/tests/test_data.c index 40212b6e..1cc9d23a 100644 --- a/tests/test_data.c +++ b/tests/test_data.c @@ -10,9 +10,9 @@ typedef struct data_test_config { } data_test_config; static int -test_data_key_cmp(const data_config *cfg, slice key1, slice key2) +test_data_key_cmp(const data_config *cfg, user_key key1, user_key key2) { - return slice_lex_cmp(key1, key2); + return slice_lex_cmp(key1.key, key2.key); } void @@ -107,11 +107,11 @@ test_data_merge_tuples_final(const data_config *cfg, static void test_data_key_to_string(const data_config *cfg, - slice key, + user_key key, char *str, size_t len) { - debug_hex_encode(str, len, slice_data(key), slice_length(key)); + debug_hex_encode(str, len, slice_data(key.key), slice_length(key.key)); } static void diff --git a/tests/unit/btree_stress_test.c b/tests/unit/btree_stress_test.c index 47f65588..5ffddc99 100644 --- a/tests/unit/btree_stress_test.c +++ b/tests/unit/btree_stress_test.c @@ -416,7 +416,7 @@ gen_key(btree_config *cfg, uint64 i, uint8 *buffer, size_t length) memset(buffer, 0, keylen); uint64 j = i * 23232323731ULL + 99382474567ULL; memcpy(buffer, &j, sizeof(j)); - return key_create(keylen, buffer); + return key_create(FALSE, keylen, buffer); } static uint64 @@ -521,7 +521,7 @@ iterator_test(platform_heap_id hid, } seen++; - prev = key_create(key_length(curr_key), prevbuf); + prev = key_create(FALSE, key_length(curr_key), prevbuf); key_copy_contents(prevbuf, curr_key); if (forwards) { diff --git a/tests/unit/btree_test.c b/tests/unit/btree_test.c index 78001a7d..ea628d90 100644 --- a/tests/unit/btree_test.c +++ b/tests/unit/btree_test.c @@ -195,7 +195,7 @@ leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) cfg, hdr, i, - key_create(i % sizeof(i), &i), + key_create(FALSE, i % sizeof(i), &i), message_create(MESSAGE_TYPE_INSERT, slice_create(i % sizeof(i), &i))); ASSERT_TRUE(rv, "Failed to insert 4-byte %d\n", i); } @@ -205,7 +205,7 @@ leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) key tuple_key = btree_get_tuple_key(cfg, hdr, i); message msg = btree_get_tuple_message(cfg, hdr, i); cmp_rv = data_key_compare( - cfg->data_cfg, key_create(i % sizeof(i), &i), tuple_key); + cfg->data_cfg, key_create(FALSE, i % sizeof(i), &i), tuple_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); cmp_rv = message_lex_cmp( @@ -220,7 +220,7 @@ leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) cfg, hdr, i, - key_create(i % sizeof(i), &i), + key_create(FALSE, i % sizeof(i), &i), message_create(MESSAGE_TYPE_INSERT, slice_create(i % sizeof(i), &i))); ASSERT_TRUE(rv, "Failed to insert 8-byte %ld\n", i); } @@ -230,7 +230,7 @@ leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) key tuple_key = btree_get_tuple_key(cfg, hdr, i); message msg = btree_get_tuple_message(cfg, hdr, i); cmp_rv = data_key_compare( - cfg->data_cfg, key_create(i % sizeof(i), &i), tuple_key); + cfg->data_cfg, key_create(FALSE, i % sizeof(i), &i), tuple_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); cmp_rv = message_lex_cmp( @@ -246,7 +246,7 @@ leaf_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) key tuple_key = btree_get_tuple_key(cfg, hdr, i); message msg = btree_get_tuple_message(cfg, hdr, i); cmp_rv = data_key_compare( - cfg->data_cfg, key_create(i % sizeof(i), &i), tuple_key); + cfg->data_cfg, key_create(FALSE, i % sizeof(i), &i), tuple_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); cmp_rv = message_lex_cmp( @@ -276,7 +276,7 @@ leaf_hdr_search_tests(btree_config *cfg, platform_heap_id hid) keybuf[0] = 17 * i; messagebuf[0] = i; - key tuple_key = key_create(1, &keybuf); + key tuple_key = key_create(FALSE, 1, &keybuf); message msg = message_create(MESSAGE_TYPE_INSERT, slice_create(i % 8, messagebuf)); @@ -292,7 +292,7 @@ leaf_hdr_search_tests(btree_config *cfg, platform_heap_id hid) key tuple_key = btree_get_tuple_key(cfg, hdr, i); uint8 ui = i; int cmp_rv = - data_key_compare(cfg->data_cfg, key_create(1, &ui), tuple_key); + data_key_compare(cfg->data_cfg, key_create(FALSE, 1, &ui), tuple_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); } @@ -318,7 +318,7 @@ index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) memset(&stats, 0, sizeof(stats)); for (uint32 i = 0; i < nkvs; i++) { rv = btree_set_index_entry( - cfg, hdr, i, key_create(i % sizeof(i), &i), i, stats); + cfg, hdr, i, key_create(FALSE, i % sizeof(i), &i), i, stats); ASSERT_TRUE(rv, "Failed to insert 4-byte %d\n", i); } @@ -326,7 +326,7 @@ index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) key pivot_key = btree_get_pivot(cfg, hdr, i); uint64 childaddr = btree_get_child_addr(cfg, hdr, i); cmp_rv = data_key_compare( - cfg->data_cfg, key_create(i % sizeof(i), &i), pivot_key); + cfg->data_cfg, key_create(FALSE, i % sizeof(i), &i), pivot_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); ASSERT_EQUAL(childaddr, i, "Bad childaddr %d\n", i); @@ -334,7 +334,7 @@ index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) for (uint64 i = 0; i < nkvs; i++) { rv = btree_set_index_entry( - cfg, hdr, i, key_create(i % sizeof(i), &i), i, stats); + cfg, hdr, i, key_create(FALSE, i % sizeof(i), &i), i, stats); ASSERT_TRUE(rv, "Failed to insert 8-byte %ld\n", i); } @@ -342,7 +342,7 @@ index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) key pivot_key = btree_get_pivot(cfg, hdr, i); uint64 childaddr = btree_get_child_addr(cfg, hdr, i); cmp_rv = data_key_compare( - cfg->data_cfg, key_create(i % sizeof(i), &i), pivot_key); + cfg->data_cfg, key_create(FALSE, i % sizeof(i), &i), pivot_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); ASSERT_EQUAL(childaddr, i, "Bad childaddr %d\n", i); @@ -354,7 +354,7 @@ index_hdr_tests(btree_config *cfg, btree_scratch *scratch, platform_heap_id hid) key pivot_key = btree_get_pivot(cfg, hdr, i); uint64 childaddr = btree_get_child_addr(cfg, hdr, i); cmp_rv = data_key_compare( - cfg->data_cfg, key_create(i % sizeof(i), &i), pivot_key); + cfg->data_cfg, key_create(FALSE, i % sizeof(i), &i), pivot_key); ASSERT_EQUAL(0, cmp_rv, "Bad 4-byte key %d\n", i); ASSERT_EQUAL(childaddr, i, "Bad childaddr %d\n", i); @@ -379,7 +379,7 @@ index_hdr_search_tests(btree_config *cfg, platform_heap_id hid) for (int i = 0; i < nkvs; i += 2) { uint8 keybuf[1]; keybuf[0] = i; - key pivot_key = key_create(1, &keybuf); + key pivot_key = key_create(FALSE, 1, &keybuf); rv = btree_set_index_entry(cfg, hdr, i / 2, pivot_key, i, stats); ASSERT_TRUE(rv, "Could not insert pivot %d\n", i); @@ -389,7 +389,7 @@ index_hdr_search_tests(btree_config *cfg, platform_heap_id hid) bool32 found; uint8 keybuf[1]; keybuf[0] = i; - key target = key_create(1, &keybuf); + key target = key_create(FALSE, 1, &keybuf); int64 idx = btree_find_pivot(cfg, hdr, target, &found); ASSERT_EQUAL( (i / 2), idx, "Bad pivot search result idx=%ld for i=%d\n", idx, i); @@ -427,7 +427,7 @@ leaf_split_tests(btree_config *cfg, uint8 keybuf[1]; keybuf[0] = 2 * realnkvs + 1; if (!btree_set_leaf_entry( - cfg, hdr, realnkvs, key_create(1, &keybuf), msg)) + cfg, hdr, realnkvs, key_create(FALSE, 1, &keybuf), msg)) { break; } @@ -438,7 +438,7 @@ leaf_split_tests(btree_config *cfg, uint64 generation; leaf_incorporate_spec spec; - key tuple_key = key_create(1, &i); + key tuple_key = key_create(FALSE, 1, &i); bool32 success = btree_leaf_incorporate_tuple( cfg, hid, hdr, tuple_key, bigger_msg, &spec, &generation); diff --git a/tests/unit/splinter_test.c b/tests/unit/splinter_test.c index 83aa471b..0680d71d 100644 --- a/tests/unit/splinter_test.c +++ b/tests/unit/splinter_test.c @@ -295,7 +295,7 @@ trunk_shadow_append(trunk_shadow *shadow, key tuple_key, message value) static key shadow_entry_key(const shadow_entry *entry, char *data) { - return key_create(entry->key_length, data + entry->key_offset); + return key_create(FALSE, entry->key_length, data + entry->key_offset); } static message diff --git a/tests/unit/splinterdb_quick_test.c b/tests/unit/splinterdb_quick_test.c index be3868f9..6e8f42a7 100644 --- a/tests/unit/splinterdb_quick_test.c +++ b/tests/unit/splinterdb_quick_test.c @@ -75,7 +75,7 @@ test_two_step_iterator(splinterdb *kvsb, int hop_i); static int -custom_key_comparator(const data_config *cfg, slice key1, slice key2); +custom_key_comparator(const data_config *cfg, user_key key1, user_key key2); typedef struct { data_config super; @@ -1135,12 +1135,12 @@ test_two_step_iterator(splinterdb *kvsb, // A user-specified spy comparator static int -custom_key_comparator(const data_config *cfg, slice key1, slice key2) +custom_key_comparator(const data_config *cfg, user_key key1, user_key key2) { - platform_assert(slice_data(key1) != NULL); - platform_assert(slice_data(key2) != NULL); + platform_assert(slice_data(key1.key) != NULL); + platform_assert(slice_data(key2.key) != NULL); - int r = slice_lex_cmp(key1, key2); + int r = slice_lex_cmp(key1.key, key2.key); // record that this spy was called comparison_counting_data_config *ccfg = From 0ddc6c3fb74a115c8031ef51ba078866688df270 Mon Sep 17 00:00:00 2001 From: Rob Johnson Date: Fri, 6 Mar 2026 13:05:36 -0800 Subject: [PATCH 2/4] optionally return most recently inserted matching key as part of a lookup --- include/splinterdb/data.h | 4 +- include/splinterdb/splinterdb.h | 6 ++- src/btree.c | 68 +++++++++++++++++++++------ src/btree.h | 22 ++++++--- src/core.c | 48 ++++++++++++++----- src/core.h | 6 ++- src/data_internal.h | 9 +++- src/default_data_config.c | 8 +++- src/routing_filter.c | 11 ++--- src/routing_filter.h | 4 +- src/splinterdb.c | 20 +++++++- src/trunk.c | 49 +++++++++++++++---- src/trunk.h | 2 + tests/functional/btree_test.c | 19 +++++--- tests/functional/filter_test.c | 4 +- tests/functional/splinter_test.c | 4 +- tests/functional/test_async.c | 1 + tests/functional/test_functionality.c | 2 +- tests/functional/ycsb_test.c | 2 +- tests/test_data.c | 8 +++- tests/unit/btree_stress_test.c | 6 ++- tests/unit/splinter_test.c | 4 +- 22 files changed, 233 insertions(+), 74 deletions(-) diff --git a/include/splinterdb/data.h b/include/splinterdb/data.h index 9ebfc557..10785003 100644 --- a/include/splinterdb/data.h +++ b/include/splinterdb/data.h @@ -114,7 +114,9 @@ typedef int (*key_compare_fn)(const data_config *cfg, user_key key1, user_key key2); -typedef uint32 (*key_hash_fn)(const void *input, size_t length, uint32 seed); +typedef uint32 (*key_hash_fn)(const data_config *cfg, + user_key key, + uint32 seed); // Given two messages, old_message and new_message, merge them // and return the result in new_message. diff --git a/include/splinterdb/splinterdb.h b/include/splinterdb/splinterdb.h index 7f0ada6d..ce55bfde 100644 --- a/include/splinterdb/splinterdb.h +++ b/include/splinterdb/splinterdb.h @@ -187,7 +187,7 @@ splinterdb_update(splinterdb *kvsb, slice key, slice delta); // Lookups // Size of opaque data required to hold a lookup result -#define SPLINTERDB_LOOKUP_BUFSIZE (10 * sizeof(void *)) +#define SPLINTERDB_LOOKUP_BUFSIZE (18 * sizeof(void *)) // A lookup result is stored and parsed from here // @@ -239,6 +239,10 @@ splinterdb_lookup_result_value(const splinterdb_lookup_result *result, // IN slice *value // OUT ); +int +splinterdb_lookup_result_key(const splinterdb_lookup_result *result, // IN + slice *key // OUT +); // Lookup the message for a given key // diff --git a/src/btree.c b/src/btree.c index 7aa871ec..27c339a5 100644 --- a/src/btree.c +++ b/src/btree.c @@ -2186,8 +2186,10 @@ btree_lookup_with_ref_async(btree_lookup_async_state *state, uint64 depth) int64 idx = btree_find_tuple( state->cfg, state->node.hdr, state->target, &state->found); if (state->found) { - state->msg = leaf_entry_message( - btree_get_leaf_entry(state->cfg, state->node.hdr, idx)); + leaf_entry *entry = + btree_get_leaf_entry(state->cfg, state->node.hdr, idx); + state->found_key = leaf_entry_key(entry); + state->msg = leaf_entry_message(entry); } else { btree_node_unget(state->cc, state->cfg, &state->node); } @@ -2195,24 +2197,28 @@ btree_lookup_with_ref_async(btree_lookup_async_state *state, uint64 depth) } -static inline void +static inline platform_status btree_lookup_with_ref(cache *cc, // IN const btree_config *cfg, // IN uint64 root_addr, // IN page_type type, // IN key target, // IN btree_node *node, // OUT + key *found_key, // OUT message *msg, // OUT bool32 *found) // OUT { + platform_status rc = STATUS_OK; btree_lookup_node(cc, cfg, root_addr, target, 0, type, node, NULL); int64 idx = btree_find_tuple(cfg, node->hdr, target, found); if (*found) { leaf_entry *entry = btree_get_leaf_entry(cfg, node->hdr, idx); + *found_key = leaf_entry_key(entry); *msg = leaf_entry_message(entry); } else { btree_node_unget(cc, cfg, node); } + return rc; } async_status @@ -2221,12 +2227,20 @@ btree_lookup_async(btree_lookup_async_state *state) async_begin(state, 0); async_await_subroutine(state, btree_lookup_with_ref_async); - bool32 success = TRUE; + + platform_status rc = STATUS_OK; if (state->found) { - success = merge_accumulator_copy_message(state->result, state->msg); + if (state->keybuf != NULL) { + rc = key_buffer_copy_key(state->keybuf, state->found_key); + } + bool32 success = + merge_accumulator_copy_message(state->result, state->msg); + if (!success) { + rc = STATUS_NO_MEMORY; + } btree_node_unget(state->cc, state->cfg, &state->node); } - async_return(state, success ? STATUS_OK : STATUS_NO_MEMORY); + async_return(state, rc); } @@ -2236,18 +2250,25 @@ btree_lookup(cache *cc, // IN uint64 root_addr, // IN page_type type, // IN key target, // IN + key_buffer *keybuf, // OUT merge_accumulator *result) // OUT { btree_node node; + key found_key; message data; platform_status rc = STATUS_OK; bool32 local_found; btree_lookup_with_ref( - cc, cfg, root_addr, type, target, &node, &data, &local_found); + cc, cfg, root_addr, type, target, &node, &found_key, &data, &local_found); if (local_found) { + if (keybuf != NULL) { + rc = key_buffer_copy_key(keybuf, found_key); + } bool32 success = merge_accumulator_copy_message(result, data); - rc = success ? STATUS_OK : STATUS_NO_MEMORY; + if (!success) { + rc = STATUS_NO_MEMORY; + } btree_node_unget(cc, cfg, &node); } return rc; @@ -2259,21 +2280,35 @@ btree_lookup_and_merge(cache *cc, // IN uint64 root_addr, // IN page_type type, // IN key target, // IN + key_buffer *keybuf, // OUT merge_accumulator *data, // OUT bool32 *local_found) // OUT { btree_node node; + key found_key; message local_data; platform_status rc = STATUS_OK; log_trace_key(target, "btree_lookup"); - btree_lookup_with_ref( - cc, cfg, root_addr, type, target, &node, &local_data, local_found); + btree_lookup_with_ref(cc, + cfg, + root_addr, + type, + target, + &node, + &found_key, + &local_data, + local_found); if (*local_found) { + if (keybuf != NULL) { + rc = key_buffer_copy_key(keybuf, found_key); + } if (merge_accumulator_is_null(data)) { bool32 success = merge_accumulator_copy_message(data, local_data); - rc = success ? STATUS_OK : STATUS_NO_MEMORY; + if (!success) { + rc = STATUS_NO_MEMORY; + } } else if (btree_merge_tuples(cfg, target, local_data, data)) { rc = STATUS_NO_MEMORY; } @@ -2313,10 +2348,15 @@ btree_lookup_and_merge_async(btree_lookup_async_state *state) platform_status rc = STATUS_OK; if (state->found) { + if (state->keybuf != NULL) { + rc = key_buffer_copy_key(state->keybuf, state->found_key); + } if (merge_accumulator_is_null(state->result)) { bool32 success = merge_accumulator_copy_message(state->result, state->msg); - rc = success ? STATUS_OK : STATUS_NO_MEMORY; + if (!success) { + rc = STATUS_NO_MEMORY; + } } else if (btree_merge_tuples( state->cfg, state->target, state->msg, state->result)) { @@ -3057,10 +3097,10 @@ btree_pack_loop(btree_pack_req *req, // IN/OUT log_trace_key(tuple_key, "btree_pack_loop (bottom)"); - if (req->hash) { + if (req->cfg->data_cfg->key_hash != NULL) { platform_assert(req->num_tuples < req->max_tuples); req->fingerprint_arr[req->num_tuples] = - req->hash(key_data(tuple_key), key_length(tuple_key), req->seed); + data_key_hash(req->cfg->data_cfg, tuple_key, req->seed); } req->num_tuples++; diff --git a/src/btree.h b/src/btree.h index 707fa69c..8f02bcef 100644 --- a/src/btree.h +++ b/src/btree.h @@ -154,7 +154,6 @@ typedef struct btree_pack_req { const btree_config *cfg; iterator *itor; // the itor which is being packed uint64 max_tuples; - hash_fn hash; // hash function used for calculating filter_hash unsigned int seed; // seed used for calculating filter_hash uint32 *fingerprint_arr; // IN/OUT: hashes of the keys in the tree @@ -208,6 +207,7 @@ btree_lookup(cache *cc, uint64 root_addr, page_type type, key target, + key_buffer *keybuf, merge_accumulator *result); static inline bool32 @@ -222,6 +222,7 @@ btree_lookup_and_merge(cache *cc, uint64 root_addr, page_type type, key target, + key_buffer *keybuf, merge_accumulator *data, bool32 *local_found); @@ -232,6 +233,7 @@ DEFINE_ASYNC_STATE(btree_lookup_async_state, 3, param, uint64, root_addr, param, page_type, type, param, key, target, + param, key_buffer *, keybuf, param, merge_accumulator *, result, param, async_callback_fn, callback, param, void *, callback_arg, @@ -242,6 +244,7 @@ DEFINE_ASYNC_STATE(btree_lookup_async_state, 3, local, btree_node, child_node, local, uint32, h, local, bool32, found, + local, key, found_key, local, message, msg, local, page_get_async_state_buffer, cache_get_state) // clang-format on @@ -253,12 +256,21 @@ btree_lookup_and_merge_async_state_init(btree_lookup_async_state *state, uint64 root_addr, page_type type, key target, + key_buffer *keybuf, merge_accumulator *result, async_callback_fn callback, void *callback_arg) { - btree_lookup_async_state_init( - state, cc, cfg, root_addr, type, target, result, callback, callback_arg); + btree_lookup_async_state_init(state, + cc, + cfg, + root_addr, + type, + target, + keybuf, + result, + callback, + callback_arg); } async_status @@ -289,7 +301,6 @@ btree_pack_req_init(btree_pack_req *req, const btree_config *cfg, iterator *itor, uint64 max_tuples, - hash_fn hash, unsigned int seed, platform_heap_id hid) { @@ -298,9 +309,8 @@ btree_pack_req_init(btree_pack_req *req, req->cfg = cfg; req->itor = itor; req->max_tuples = max_tuples; - req->hash = hash; req->seed = seed; - if (hash != NULL && max_tuples > 0) { + if (cfg->data_cfg->key_hash != NULL && max_tuples > 0) { req->fingerprint_arr = TYPED_ARRAY_ZALLOC(hid, req->fingerprint_arr, max_tuples); diff --git a/src/core.c b/src/core.c index 804673eb..67c0120e 100644 --- a/src/core.c +++ b/src/core.c @@ -446,7 +446,6 @@ core_memtable_compact(core_handle *spl, uint64 generation, const threadid tid) spl->cfg.btree_cfg, itor, rflimit, - rfcfg->hash, rfcfg->seed, spl->heap_id); uint64 pack_start; @@ -708,6 +707,7 @@ static platform_status core_memtable_lookup(core_handle *spl, uint64 generation, key target, + key_buffer *keybuf, merge_accumulator *data) { cache *const cc = spl->cc; @@ -721,7 +721,7 @@ core_memtable_lookup(core_handle *spl, bool32 local_found; rc = btree_lookup_and_merge( - cc, cfg, root_addr, type, target, data, &local_found); + cc, cfg, root_addr, type, target, keybuf, data, &local_found); return rc; } @@ -1286,7 +1286,10 @@ core_insert(core_handle *spl, key tuple_key, message data) // If any change is made in here, please make similar change in // core_lookup_async platform_status -core_lookup(core_handle *spl, key target, merge_accumulator *result) +core_lookup(core_handle *spl, + key target, + key_buffer *keybuf, + merge_accumulator *result) { // look in memtables @@ -1295,6 +1298,11 @@ core_lookup(core_handle *spl, key target, merge_accumulator *result) // 2. for gen = mt->generation; mt[gen % ...].gen == gen; gen --; // also handles switch to READY ^^^^^ + if (keybuf != NULL) { + platform_status rc = key_buffer_copy_key(keybuf, NULL_KEY); + platform_assert_status_ok(rc); // should never fail + } + merge_accumulator_set_to_null(result); memtable_begin_lookup(&spl->mt_ctxt); @@ -1304,8 +1312,14 @@ core_lookup(core_handle *spl, key target, merge_accumulator *result) for (uint64 mt_gen = mt_gen_start; mt_gen != mt_gen_end; mt_gen--) { platform_status rc; - rc = core_memtable_lookup(spl, mt_gen, target, result); + rc = core_memtable_lookup(spl, mt_gen, target, keybuf, result); platform_assert_status_ok(rc); + if (keybuf != NULL && !key_buffer_is_null(keybuf)) { + // Once we find a matching message, that is the key that we will + // return, so NULL out our reference to the key buffer so that we don't + // modify it further. + keybuf = NULL; + } if (merge_accumulator_is_definitive(result)) { memtable_end_lookup(&spl->mt_ctxt); goto found_final_answer_early; @@ -1323,7 +1337,7 @@ core_lookup(core_handle *spl, key target, merge_accumulator *result) rc = trunk_merge_lookup( - &spl->trunk_context, &root_handle, target, result, NULL); + &spl->trunk_context, &root_handle, target, keybuf, result, NULL); trunk_ondisk_node_handle_deinit(&root_handle); if (!SUCCESS(rc)) { return rc; @@ -1376,9 +1390,15 @@ core_lookup_async(core_lookup_async_state *state) for (uint64 mt_gen = mt_gen_start; mt_gen != mt_gen_end; mt_gen--) { platform_status rc; - rc = - core_memtable_lookup(state->spl, mt_gen, state->target, state->result); + rc = core_memtable_lookup( + state->spl, mt_gen, state->target, state->keybuf, state->result); platform_assert_status_ok(rc); + if (state->keybuf != NULL && !key_buffer_is_null(state->keybuf)) { + // Once we find a matching message, that is the key that we will + // return, so NULL out our reference to the key buffer so that we don't + // modify it further. + state->keybuf = NULL; + } if (merge_accumulator_is_definitive(state->result)) { memtable_end_lookup(&state->spl->mt_ctxt); goto found_final_answer_early; @@ -1399,6 +1419,7 @@ core_lookup_async(core_lookup_async_state *state) &state->spl->trunk_context, &state->root_handle, state->target, + state->keybuf, state->result, NULL, state->callback, @@ -1513,12 +1534,11 @@ core_mkfs(core_handle *spl, spl->log = log_create(cc, spl->cfg.log_cfg, spl->heap_id); } - // ALEX: For now we assume an init means destroying any present super blocks - core_set_super_block(spl, FALSE, FALSE, TRUE); - trunk_context_init( &spl->trunk_context, spl->cfg.trunk_node_cfg, hid, cc, al, ts, 0); + core_set_super_block(spl, FALSE, FALSE, TRUE); + if (spl->cfg.use_stats) { spl->stats = TYPED_ARRAY_ZALLOC(spl->heap_id, spl->stats, MAX_THREADS); platform_assert(spl->stats); @@ -1932,6 +1952,8 @@ core_print_lookup_stats(platform_log_handle *log_handle, core_handle *spl) void core_print_lookup(core_handle *spl, key target, platform_log_handle *log_handle) { + key_buffer keybuf; + key_buffer_init(&keybuf, PROCESS_PRIVATE_HEAP_ID); merge_accumulator data; merge_accumulator_init(&data, PROCESS_PRIVATE_HEAP_ID); @@ -1950,13 +1972,14 @@ core_print_lookup(core_handle *spl, key target, platform_log_handle *log_handle) root_addr, PAGE_TYPE_MEMTABLE, target, + &keybuf, &data); platform_assert_status_ok(rc); if (!merge_accumulator_is_null(&data)) { char key_str[128]; char message_str[128]; message msg = merge_accumulator_to_message(&data); - core_key_to_string(spl, target, key_str); + core_key_to_string(spl, key_buffer_key(&keybuf), key_str); core_message_to_string(spl, msg, message_str); platform_log_stream( &stream, @@ -1973,7 +1996,8 @@ core_print_lookup(core_handle *spl, key target, platform_log_handle *log_handle) trunk_ondisk_node_handle handle; trunk_init_root_handle(&spl->trunk_context, &handle); - trunk_merge_lookup(&spl->trunk_context, &handle, target, &data, log_handle); + trunk_merge_lookup( + &spl->trunk_context, &handle, target, &keybuf, &data, log_handle); trunk_ondisk_node_handle_deinit(&handle); } diff --git a/src/core.h b/src/core.h index 6c7de97e..731db148 100644 --- a/src/core.h +++ b/src/core.h @@ -150,7 +150,10 @@ platform_status core_insert(core_handle *spl, key tuple_key, message data); platform_status -core_lookup(core_handle *spl, key target, merge_accumulator *result); +core_lookup(core_handle *spl, + key target, + key_buffer *keybuf, + merge_accumulator *result); static inline bool32 core_lookup_found(merge_accumulator *result) @@ -162,6 +165,7 @@ core_lookup_found(merge_accumulator *result) DEFINE_ASYNC_STATE(core_lookup_async_state, 1, param, core_handle *, spl, param, key, target, + param, key_buffer *, keybuf, param, merge_accumulator *, result, param, async_callback_fn, callback, param, void *, callback_arg, diff --git a/src/data_internal.h b/src/data_internal.h index 44996f70..684f5144 100644 --- a/src/data_internal.h +++ b/src/data_internal.h @@ -163,6 +163,12 @@ key_buffer_key(const key_buffer *kb) } } +static inline bool32 +key_buffer_is_null(key_buffer *kb) +{ + return kb->kind == USER_KEY && writable_buffer_is_null(&kb->wb); +} + static inline void key_buffer_init(key_buffer *kb, platform_heap_id hid) { @@ -582,7 +588,8 @@ static inline uint32 data_key_hash(const data_config *cfg, key k, uint32 seed) { if (key_is_user_key(k)) { - return cfg->key_hash(key_data(k), key_length(k), seed); + user_key arg = {.key = k.user_slice, .is_query_key = k.is_query_key}; + return cfg->key_hash(cfg, arg, seed); } else { return seed * (uint32)k.kind; } diff --git a/src/default_data_config.c b/src/default_data_config.c index 4528d1ad..2b96ac72 100644 --- a/src/default_data_config.c +++ b/src/default_data_config.c @@ -27,6 +27,12 @@ key_compare(const data_config *cfg, user_key key1, user_key key2) return slice_lex_cmp(key1.key, key2.key); } +static uint32 +key_hash(const data_config *cfg, user_key key, uint32 seed) +{ + platform_assert(slice_data(key.key) != NULL); + return platform_hash32(slice_data(key.key), slice_length(key.key), seed); +} static void key_to_string(const data_config *cfg, user_key key, char *str, size_t max_len) @@ -56,7 +62,7 @@ default_data_config_init(const size_t max_key_size, // IN data_config cfg = { .max_key_size = max_key_size, .key_compare = key_compare, - .key_hash = platform_hash32, + .key_hash = key_hash, .merge_tuples = NULL, .merge_tuples_final = NULL, .key_to_string = key_to_string, diff --git a/src/routing_filter.c b/src/routing_filter.c index 49e6b502..62b2e894 100644 --- a/src/routing_filter.c +++ b/src/routing_filter.c @@ -897,8 +897,8 @@ routing_filter_lookup_async(routing_filter_lookup_async_state *state) async_return(state, STATUS_OK); } - state->fp = state->cfg->hash( - key_data(state->target), key_length(state->target), state->cfg->seed); + state->fp = + data_key_hash(state->cfg->data_cfg, state->target, state->cfg->seed); state->fp >>= 32 - state->cfg->fingerprint_size; uint32 log_num_buckets = 31 - __builtin_clz(state->filter.num_fingerprints); if (log_num_buckets < state->cfg->log_index_size) { @@ -998,11 +998,10 @@ routing_filter_lookup(cache *cc, return STATUS_OK; } - hash_fn hash = cfg->hash; - uint64 seed = cfg->seed; - uint64 index_size = cfg->index_size; + uint64 seed = cfg->seed; + uint64 index_size = cfg->index_size; - uint32 fp = hash(key_data(target), key_length(target), seed); + uint32 fp = data_key_hash(cfg->data_cfg, target, seed); fp >>= 32 - cfg->fingerprint_size; size_t value_size = filter->value_size; uint32 log_num_buckets = 31 - __builtin_clz(filter->num_fingerprints); diff --git a/src/routing_filter.h b/src/routing_filter.h index 70366c8b..aa21d90b 100644 --- a/src/routing_filter.h +++ b/src/routing_filter.h @@ -36,7 +36,6 @@ typedef struct routing_config { uint32 index_size; uint32 log_index_size; - hash_fn hash; unsigned int seed; } routing_config; @@ -46,7 +45,7 @@ routing_config_init(routing_config *cfg, data_config *data_cfg, uint32 fingerprint_size, uint32 log_index_size, - hash_fn hash, + key_hash_fn hash, unsigned int seed) { cfg->cache_cfg = cache_cfg; @@ -54,7 +53,6 @@ routing_config_init(routing_config *cfg, cfg->fingerprint_size = fingerprint_size; cfg->index_size = 1UL << log_index_size; cfg->log_index_size = log_index_size; - cfg->hash = hash; cfg->seed = seed; return STATUS_OK; } diff --git a/src/splinterdb.c b/src/splinterdb.c index 83603f7e..76a48122 100644 --- a/src/splinterdb.c +++ b/src/splinterdb.c @@ -524,6 +524,7 @@ splinterdb_update(splinterdb *kvsb, slice user_key, slice update) *----------------------------------------------------------------------------- */ typedef struct { + key_buffer keybuf; merge_accumulator value; } _splinterdb_lookup_result; @@ -543,8 +544,9 @@ splinterdb_lookup_result_init(const splinterdb *kvs, // IN ) { _splinterdb_lookup_result *_result = (_splinterdb_lookup_result *)result; + key_buffer_init(&_result->keybuf, PROCESS_PRIVATE_HEAP_ID); merge_accumulator_init_with_buffer(&_result->value, - NULL, + PROCESS_PRIVATE_HEAP_ID, buffer_len, buffer, WRITABLE_BUFFER_NULL_LENGTH, @@ -579,6 +581,20 @@ splinterdb_lookup_result_value(const splinterdb_lookup_result *result, // IN return 0; } +int +splinterdb_lookup_result_key(const splinterdb_lookup_result *result, // IN + slice *key) +{ + _splinterdb_lookup_result *_result = (_splinterdb_lookup_result *)result; + + if (!splinterdb_lookup_found(result)) { + return EINVAL; + } + + *key = key_slice(key_buffer_key(&_result->keybuf)); + return 0; +} + /* *----------------------------------------------------------------------------- * splinterdb_lookup -- @@ -609,7 +625,7 @@ splinterdb_lookup(splinterdb *kvs, // IN key target = key_create_from_slice(TRUE, user_key); platform_assert(kvs != NULL); - status = core_lookup(&kvs->spl, target, &_result->value); + status = core_lookup(&kvs->spl, target, &_result->keybuf, &_result->value); return platform_status_to_int(status); } diff --git a/src/trunk.c b/src/trunk.c index 7a5106ce..7000943f 100644 --- a/src/trunk.c +++ b/src/trunk.c @@ -3469,7 +3469,6 @@ bundle_compaction_task(task *arg) context->cfg->btree_cfg, &merger.merge_itor->super, tuple_bound, - context->cfg->filter_cfg->hash, context->cfg->filter_cfg->seed, context->hid); @@ -5123,6 +5122,7 @@ trunk_ondisk_bundle_merge_lookup(trunk_context *context, uint64 height, trunk_ondisk_bundle *bndl, key tgt, + key_buffer *keybuf, merge_accumulator *result, platform_log_handle *log) { @@ -5169,6 +5169,7 @@ trunk_ondisk_bundle_merge_lookup(trunk_context *context, branch_ref_addr(bndl->branches[idx]), trunk_ondisk_bundle_branch_type(bndl), tgt, + keybuf, result, &local_found); if (!SUCCESS(rc)) { @@ -5185,6 +5186,9 @@ trunk_ondisk_bundle_merge_lookup(trunk_context *context, } } + if (keybuf != NULL && !key_buffer_is_null(keybuf)) { + keybuf = NULL; + } if (!log && merge_accumulator_is_definitive(result)) { return STATUS_OK; @@ -5192,12 +5196,15 @@ trunk_ondisk_bundle_merge_lookup(trunk_context *context, if (log) { merge_accumulator ma; + key_buffer kb; + key_buffer_init(&kb, PROCESS_PRIVATE_HEAP_ID); merge_accumulator_init(&ma, PROCESS_PRIVATE_HEAP_ID); rc = btree_lookup_and_merge(context->cc, context->cfg->btree_cfg, branch_ref_addr(bndl->branches[idx]), trunk_ondisk_bundle_branch_type(bndl), tgt, + &kb, &ma, &local_found); platform_log(log, @@ -5205,11 +5212,15 @@ trunk_ondisk_bundle_merge_lookup(trunk_context *context, branch_ref_addr(bndl->branches[idx]), local_found); if (local_found) { + key ky = key_buffer_key(&kb); message msg = merge_accumulator_to_message(&ma); - platform_log( - log, "msg: %s\n", message_string(context->cfg->data_cfg, msg)); + platform_log(log, + "key: %s msg: %s\n", + key_string(context->cfg->data_cfg, ky), + message_string(context->cfg->data_cfg, msg)); } merge_accumulator_deinit(&ma); + key_buffer_deinit(&kb); } } @@ -5272,6 +5283,7 @@ trunk_ondisk_bundle_merge_lookup_async(trunk_merge_lookup_async_state *state, branch_ref_addr(state->bndl->branches[state->idx]), trunk_ondisk_bundle_branch_type(state->bndl), state->tgt, + state->keybuf, state->result, state->callback, state->callback_arg); @@ -5290,14 +5302,18 @@ trunk_ondisk_bundle_merge_lookup_async(trunk_merge_lookup_async_state *state, } } - + if (state->keybuf != NULL && !key_buffer_is_null(state->keybuf)) { + state->keybuf = NULL; + } if (!state->log && merge_accumulator_is_definitive(state->result)) { async_return(state); } if (state->log) { merge_accumulator ma; - merge_accumulator_init(&ma, state->context->hid); + key_buffer kb; + key_buffer_init(&kb, PROCESS_PRIVATE_HEAP_ID); + merge_accumulator_init(&ma, PROCESS_PRIVATE_HEAP_ID); // Not bothering to make the logging paths async platform_status rc = btree_lookup_and_merge( state->context->cc, @@ -5305,6 +5321,7 @@ trunk_ondisk_bundle_merge_lookup_async(trunk_merge_lookup_async_state *state, branch_ref_addr(state->bndl->branches[state->idx]), trunk_ondisk_bundle_branch_type(state->bndl), state->tgt, + &kb, &ma, &state->btree_state.found); platform_assert_status_ok(rc); @@ -5313,12 +5330,15 @@ trunk_ondisk_bundle_merge_lookup_async(trunk_merge_lookup_async_state *state, branch_ref_addr(state->bndl->branches[state->idx]), state->btree_state.found); if (state->btree_state.found) { + key ky = key_buffer_key(&kb); message msg = merge_accumulator_to_message(&ma); platform_log(state->log, - "msg: %s\n", + "key: %s msg: %s\n", + key_string(state->context->cfg->data_cfg, ky), message_string(state->context->cfg->data_cfg, msg)); } merge_accumulator_deinit(&ma); + key_buffer_deinit(&kb); } } @@ -5329,6 +5349,7 @@ platform_status trunk_merge_lookup(trunk_context *context, trunk_ondisk_node_handle *inhandle, key tgt, + key_buffer *keybuf, merge_accumulator *result, platform_log_handle *log) { @@ -5384,13 +5405,16 @@ trunk_merge_lookup(trunk_context *context, } for (uint64 i = 0; i < pivot->num_live_inflight_bundles; i++) { rc = trunk_ondisk_bundle_merge_lookup( - context, height, bndl, tgt, result, log); + context, height, bndl, tgt, keybuf, result, log); if (!SUCCESS(rc)) { platform_error_log("trunk_merge_lookup: " "ondisk_bundle_merge_lookup failed: %d\n", rc.r); goto cleanup; } + if (keybuf != NULL && !key_buffer_is_null(keybuf)) { + keybuf = NULL; + } if (merge_accumulator_is_definitive(result)) { goto cleanup; } @@ -5402,13 +5426,16 @@ trunk_merge_lookup(trunk_context *context, // Search the pivot bundle bndl = trunk_ondisk_pivot_bundle(pivot); rc = trunk_ondisk_bundle_merge_lookup( - context, height, bndl, tgt, result, log); + context, height, bndl, tgt, keybuf, result, log); if (!SUCCESS(rc)) { platform_error_log("trunk_merge_lookup: " "ondisk_bundle_merge_lookup failed: %d\n", rc.r); goto cleanup; } + if (keybuf != NULL && !key_buffer_is_null(keybuf)) { + keybuf = NULL; + } if (!log && merge_accumulator_is_definitive(result)) { goto cleanup; } @@ -5510,6 +5537,9 @@ trunk_merge_lookup_async(trunk_merge_lookup_async_state *state) state->rc.r); goto cleanup; } + if (state->keybuf != NULL && !key_buffer_is_null(state->keybuf)) { + state->keybuf = NULL; + } if (merge_accumulator_is_definitive(state->result)) { goto cleanup; } @@ -5537,6 +5567,9 @@ trunk_merge_lookup_async(trunk_merge_lookup_async_state *state) state->rc.r); goto cleanup; } + if (state->keybuf != NULL && !key_buffer_is_null(state->keybuf)) { + state->keybuf = NULL; + } if (!state->log && merge_accumulator_is_definitive(state->result)) { goto cleanup; } diff --git a/src/trunk.h b/src/trunk.h index f7e12575..7a3fb26e 100644 --- a/src/trunk.h +++ b/src/trunk.h @@ -286,6 +286,7 @@ platform_status trunk_merge_lookup(trunk_context *context, trunk_ondisk_node_handle *handle, key tgt, + key_buffer *keybuf, merge_accumulator *result, platform_log_handle *log); @@ -313,6 +314,7 @@ DEFINE_ASYNC_STATE(trunk_merge_lookup_async_state, 4, param, trunk_context *, context, param, trunk_ondisk_node_handle *, inhandle, param, key, tgt, + param, key_buffer *, keybuf, param, merge_accumulator *, result, param, platform_log_handle *, log, param, async_callback_fn, callback, diff --git a/tests/functional/btree_test.c b/tests/functional/btree_test.c index 945edba9..b499b95f 100644 --- a/tests/functional/btree_test.c +++ b/tests/functional/btree_test.c @@ -124,7 +124,7 @@ test_btree_lookup(cache *cc, merge_accumulator_init(&result, hid); - rc = btree_lookup(cc, cfg, root_addr, type, target, &result); + rc = btree_lookup(cc, cfg, root_addr, type, target, NULL, &result); platform_assert_status_ok(rc); message data = merge_accumulator_to_message(&result); @@ -488,6 +488,7 @@ test_btree_async_lookup(cache *cc, root_addr, type, target, + NULL, &async_ctxt->result, btree_test_async_callback, async_ctxt); @@ -658,7 +659,7 @@ test_btree_basic(cache *cc, platform_timestamp_elapsed(start_time)); btree_pack_req req; rc = btree_pack_req_init( - &req, cc, btree_cfg, (iterator *)&itor, UINT64_MAX, NULL, 0, NULL); + &req, cc, btree_cfg, (iterator *)&itor, UINT64_MAX, 0, NULL); platform_assert_status_ok(rc); btree_print_tree_stats(Platform_default_log_handle, @@ -676,6 +677,7 @@ test_btree_basic(cache *cc, platform_default_log("btree itor/pack time per tuple %luns\n", platform_timestamp_elapsed(start_time) / num_inserts); cache_assert_free(cc); + btree_pack_req_deinit(&req, hid); num_async = 0; start_time = platform_get_timestamp(); @@ -839,13 +841,14 @@ test_btree_create_packed_trees(cache *cc, btree_pack_req req; rc = btree_pack_req_init( - &req, cc, btree_cfg, &itor.super, UINT64_MAX, NULL, 0, hid); + &req, cc, btree_cfg, &itor.super, UINT64_MAX, 0, hid); platform_assert_status_ok(rc); rc = btree_pack(&req); platform_assert_status_ok(rc); btree_iterator_deinit(&itor); root_addr[tree_no] = req.root_addr; + btree_pack_req_deinit(&req, hid); } merge_accumulator_deinit(&data); @@ -1075,10 +1078,11 @@ test_btree_merge_basic(cache *cc, btree_pack_req req; rc = btree_pack_req_init( - &req, cc, btree_cfg, &merge_itor->super, UINT64_MAX, NULL, 0, hid); + &req, cc, btree_cfg, &merge_itor->super, UINT64_MAX, 0, hid); platform_assert_status_ok(rc); btree_pack(&req); output_addr[pivot_no] = req.root_addr; + btree_pack_req_deinit(&req, hid); for (uint64 tree_no = 0; tree_no < arity; tree_no++) { btree_iterator_deinit(&btree_itor_arr[tree_no]); @@ -1102,7 +1106,7 @@ test_btree_merge_basic(cache *cc, uint64 output_count = 0; rc = test_count_tuples_in_range(cc, btree_cfg, - &req.root_addr, + &output_addr[pivot_no], PAGE_TYPE_BRANCH, 1, lo, @@ -1120,7 +1124,7 @@ test_btree_merge_basic(cache *cc, cc, btree_cfg, root_addr, PAGE_TYPE_BRANCH, arity, lo, hi); platform_default_log("****\n"); test_btree_print_all_keys( - cc, btree_cfg, &req.root_addr, PAGE_TYPE_BRANCH, 1, lo, hi); + cc, btree_cfg, &output_addr[pivot_no], PAGE_TYPE_BRANCH, 1, lo, hi); platform_default_log( "test_btree_merge_basic: input and output counts do not match\n"); platform_default_log( @@ -1466,11 +1470,12 @@ test_btree_merge_perf(cache *cc, btree_pack_req req; rc = btree_pack_req_init( - &req, cc, btree_cfg, &merge_itor->super, UINT64_MAX, NULL, 0, hid); + &req, cc, btree_cfg, &merge_itor->super, UINT64_MAX, 0, hid); platform_assert_status_ok(rc); btree_pack(&req); output_addr[merge_no * num_merges + pivot_no] = req.root_addr; + btree_pack_req_deinit(&req, hid); for (uint64 tree_no = 0; tree_no < arity; tree_no++) { btree_iterator_deinit(&btree_itor_arr[tree_no]); } diff --git a/tests/functional/filter_test.c b/tests/functional/filter_test.c index 907b2dc5..caaefc6c 100644 --- a/tests/functional/filter_test.c +++ b/tests/functional/filter_test.c @@ -60,7 +60,7 @@ test_filter_basic(cache *cc, num_input_keys[i]++; } *keybuf = (i + 1) * j; - fp_arr[i][j] = cfg->hash(keybuf, key_size, cfg->seed); + fp_arr[i][j] = data_key_hash(cfg->data_cfg, target, cfg->seed); } } @@ -171,7 +171,7 @@ test_filter_perf(cache *cc, for (uint64 i = 0; i < num_values * num_fingerprints; i++) { uint64 idx = k * num_values * num_fingerprints + i; *keybuf = idx; - fp_arr[idx] = cfg->hash(keybuf, key_size, cfg->seed); + fp_arr[idx] = data_key_hash(cfg->data_cfg, target, cfg->seed); } } diff --git a/tests/functional/splinter_test.c b/tests/functional/splinter_test.c index ca938514..a014717b 100644 --- a/tests/functional/splinter_test.c +++ b/tests/functional/splinter_test.c @@ -293,7 +293,7 @@ test_trunk_lookup_thread(void *arg) core_max_key_size(spl), test_cfg[spl_idx].period); ts = platform_get_timestamp(); - rc = core_lookup(spl, key_buffer_key(&keybuf), &data); + rc = core_lookup(spl, key_buffer_key(&keybuf), NULL, &data); ts = platform_timestamp_elapsed(ts); if (ts > params->lookup_stats[SYNC_LU].latency_max) { params->lookup_stats[SYNC_LU].latency_max = ts; @@ -622,7 +622,7 @@ do_operation(test_splinter_thread_params *params, core_max_key_size(spl), test_cfg[spl_idx].period); ts = platform_get_timestamp(); - rc = core_lookup(spl, key_buffer_key(&keybuf), &msg); + rc = core_lookup(spl, key_buffer_key(&keybuf), NULL, &msg); platform_assert(SUCCESS(rc)); ts = platform_timestamp_elapsed(ts); if (ts > params->lookup_stats[SYNC_LU].latency_max) { diff --git a/tests/functional/test_async.c b/tests/functional/test_async.c index d4c4f934..b900bfd9 100644 --- a/tests/functional/test_async.c +++ b/tests/functional/test_async.c @@ -149,6 +149,7 @@ async_ctxt_submit(core_handle *spl, core_lookup_async_state_init(&ctxt->state, spl, key_buffer_key(&ctxt->key), + NULL, &ctxt->data, test_async_callback, ctxt); diff --git a/tests/functional/test_functionality.c b/tests/functional/test_functionality.c index 948737ba..418c9e89 100644 --- a/tests/functional/test_functionality.c +++ b/tests/functional/test_functionality.c @@ -181,7 +181,7 @@ verify_against_shadow(core_handle *spl, if (ctxt == NULL) { test_int_to_key(&keybuf, keynum, key_size); key target = key_buffer_key(&keybuf); - rc = core_lookup(spl, target, &merge_acc); + rc = core_lookup(spl, target, NULL, &merge_acc); if (!SUCCESS(rc)) { return rc; } diff --git a/tests/functional/ycsb_test.c b/tests/functional/ycsb_test.c index 904da21c..9f09c033 100644 --- a/tests/functional/ycsb_test.c +++ b/tests/functional/ycsb_test.c @@ -340,7 +340,7 @@ ycsb_thread(void *arg) case 'r': { rc = core_lookup( - spl, key_create(TRUE, YCSB_KEY_SIZE, ops->key), &value); + spl, key_create(TRUE, YCSB_KEY_SIZE, ops->key), NULL, &value); platform_assert_status_ok(rc); // if (!ops->found) { // char key_str[128]; diff --git a/tests/test_data.c b/tests/test_data.c index 1cc9d23a..ae90eb18 100644 --- a/tests/test_data.c +++ b/tests/test_data.c @@ -15,6 +15,12 @@ test_data_key_cmp(const data_config *cfg, user_key key1, user_key key2) return slice_lex_cmp(key1.key, key2.key); } +static uint32 +test_data_key_hash(const data_config *cfg, user_key key, uint32 seed) +{ + return platform_hash32(slice_data(key.key), slice_length(key.key), seed); +} + void test_data_generate_message(const data_config *cfg, message_type type, @@ -128,7 +134,7 @@ static data_test_config data_test_config_internal = { { .max_key_size = 24, .key_compare = test_data_key_cmp, - .key_hash = platform_hash32, + .key_hash = test_data_key_hash, .key_to_string = test_data_key_to_string, .message_to_string = test_data_message_to_string, .merge_tuples = test_data_merge_tuples, diff --git a/tests/unit/btree_stress_test.c b/tests/unit/btree_stress_test.c index 5ffddc99..9104e4b1 100644 --- a/tests/unit/btree_stress_test.c +++ b/tests/unit/btree_stress_test.c @@ -467,6 +467,7 @@ query_tests(cache *cc, root_addr, type, gen_key(cfg, i, keybuf, bt_page_size), + NULL, &result); if (!btree_found(&result) || message_lex_cmp(merge_accumulator_to_message(&result), @@ -665,7 +666,7 @@ pack_tests(cache *cc, platform_status rc = STATUS_TEST_FAILED; btree_pack_req req; - rc = btree_pack_req_init(&req, cc, cfg, iter, nkvs, NULL, 0, hid); + rc = btree_pack_req_init(&req, cc, cfg, iter, nkvs, 0, hid); ASSERT_TRUE(SUCCESS(rc)); if (!SUCCESS(btree_pack(&req))) { @@ -674,7 +675,8 @@ pack_tests(cache *cc, CTEST_LOG_INFO("Packed %lu items ", req.num_tuples); } + uint64 packed_root_addr = req.root_addr; btree_pack_req_deinit(&req, hid); - return req.root_addr; + return packed_root_addr; } diff --git a/tests/unit/splinter_test.c b/tests/unit/splinter_test.c index 0680d71d..8e3bcb2e 100644 --- a/tests/unit/splinter_test.c +++ b/tests/unit/splinter_test.c @@ -439,7 +439,7 @@ CTEST2(splinter, test_lookups) test_key(&keybuf, TEST_RANDOM, insert_num, 0, 0, key_size, 0); merge_accumulator_set_to_null(&qdata); - rc = core_lookup(&spl, key_buffer_key(&keybuf), &qdata); + rc = core_lookup(&spl, key_buffer_key(&keybuf), NULL, &qdata); ASSERT_TRUE(SUCCESS(rc), "trunk_lookup() FAILURE, insert_num=%lu: %s\n", insert_num, @@ -476,7 +476,7 @@ CTEST2(splinter, test_lookups) test_key(&keybuf, TEST_RANDOM, insert_num, 0, 0, key_size, 0); - rc = core_lookup(&spl, key_buffer_key(&keybuf), &qdata); + rc = core_lookup(&spl, key_buffer_key(&keybuf), NULL, &qdata); ASSERT_TRUE(SUCCESS(rc), "trunk_lookup() FAILURE, insert_num=%lu: %s\n", insert_num, From d103b415908f06e3de6c3198bd4c599111495298 Mon Sep 17 00:00:00 2001 From: Rob Johnson Date: Mon, 9 Mar 2026 12:30:01 -0700 Subject: [PATCH 3/4] change default file perms Signed-off-by: Rob Johnson --- src/platform_linux/platform_io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/platform_linux/platform_io.h b/src/platform_linux/platform_io.h index 72467cd3..b7f0122e 100644 --- a/src/platform_linux/platform_io.h +++ b/src/platform_linux/platform_io.h @@ -35,7 +35,7 @@ struct iovec; (IO_DEFAULT_PAGES_PER_EXTENT * IO_DEFAULT_PAGE_SIZE) #define IO_DEFAULT_FLAGS (O_RDWR | O_CREAT) -#define IO_DEFAULT_PERMS (0755) +#define IO_DEFAULT_PERMS (0600) #define IO_DEFAULT_KERNEL_QUEUE_SIZE (256) #define IO_DEFAULT_ASYNC_QUEUE_DEPTH (256) From f394343247825128e3a89ae8b53eba8c3201ff9c Mon Sep 17 00:00:00 2001 From: Rob Johnson Date: Mon, 9 Mar 2026 13:59:09 -0700 Subject: [PATCH 4/4] fix example Signed-off-by: Rob Johnson --- .../splinterdb_custom_ipv4_addr_sortcmp_example.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/splinterdb_custom_ipv4_addr_sortcmp_example.c b/examples/splinterdb_custom_ipv4_addr_sortcmp_example.c index c534f514..04e42b34 100644 --- a/examples/splinterdb_custom_ipv4_addr_sortcmp_example.c +++ b/examples/splinterdb_custom_ipv4_addr_sortcmp_example.c @@ -100,7 +100,7 @@ configure_splinter_instance(splinterdb_config *splinterdb_cfg, uint64 cache_size); int -custom_key_compare(const data_config *cfg, slice key1, slice key2); +custom_key_compare(const data_config *cfg, user_key key1, user_key key2); int ip4_ipaddr_keycmp(const char *key1, @@ -208,12 +208,12 @@ configure_splinter_instance(splinterdb_config *splinterdb_cfg, * ----------------------------------------------------------------------------- */ int -custom_key_compare(const data_config *cfg, slice key1, slice key2) +custom_key_compare(const data_config *cfg, user_key key1, user_key key2) { - return ip4_ipaddr_keycmp((const char *)slice_data(key1), - slice_length(key1), - (const char *)slice_data(key2), - slice_length(key2)); + return ip4_ipaddr_keycmp((const char *)slice_data(key1.key), + slice_length(key1.key), + (const char *)slice_data(key2.key), + slice_length(key2.key)); } /*