Skip to content

Commit 920bf12

Browse files
authored
Optimize skiplist query efficiency by embedding the skiplist header (#2867)
Embedding the skiplist header reduces memory jumps, thus optimizing sorted set query speed. ``` // Before typedef struct zskiplist { struct zskiplistNode *header, *tail; unsigned long length; int level; /* Height of the skiplist. */ } zskiplist; // After typedef struct zskiplist { unsigned long length; struct zskiplistNode *tail; /* Since the span at level 0 is always 1 or 0 , * this field is instead used for storing the height of the skiplist. */ struct zskiplistLevel { struct zskiplistNode *forward; unsigned long span; } level[ZSKIPLIST_MAXLEVEL]; } zskiplist; ``` --------- Signed-off-by: chzhoo <[email protected]>
1 parent 1fd6d71 commit 920bf12

File tree

7 files changed

+171
-127
lines changed

7 files changed

+171
-127
lines changed

src/defrag.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -239,16 +239,16 @@ robj *activeDefragStringOb(robj *ob) {
239239
/* Internal function used by zslDefrag */
240240
static void zslUpdateNode(zskiplist *zsl, zskiplistNode *oldnode, zskiplistNode *newnode, zskiplistNode **update) {
241241
int i;
242-
for (i = 0; i < zsl->level; i++) {
242+
for (i = 0; i < zslGetHeight(zsl); i++) {
243243
if (update[i]->level[i].forward == oldnode) update[i]->level[i].forward = newnode;
244244
}
245-
serverAssert(zsl->header != oldnode);
245+
serverAssert(zslGetHeader(zsl) != oldnode);
246246
if (newnode->level[0].forward) {
247247
serverAssert(newnode->level[0].forward->backward == oldnode);
248248
newnode->level[0].forward->backward = newnode;
249249
} else {
250-
serverAssert(zsl->tail == oldnode);
251-
zsl->tail = newnode;
250+
serverAssert(zslGetTail(zsl) == oldnode);
251+
zslSetTail(zsl, newnode);
252252
}
253253
}
254254

@@ -269,8 +269,8 @@ static void activeDefragZsetNode(void *privdata, void *entry_ref) {
269269
/* find skiplist pointers that need to be updated if we end up moving the
270270
* skiplist node. */
271271
zskiplistNode *update[ZSKIPLIST_MAXLEVEL];
272-
zskiplistNode *x = zsl->header;
273-
for (int i = zsl->level - 1; i >= 0; i--) {
272+
zskiplistNode *x = zslGetHeader(zsl);
273+
for (int i = zslGetHeight(zsl) - 1; i >= 0; i--) {
274274
/* stop when we've reached the end of this level or the next node comes
275275
* after our target in sorted order */
276276
zskiplistNode *next = x->level[i].forward;
@@ -453,13 +453,11 @@ static void defragZsetSkiplist(robj *ob) {
453453

454454
zset *newzs;
455455
zskiplist *newzsl;
456-
struct zskiplistNode *newheader;
457456
if ((newzs = activeDefragAlloc(zs))) {
458457
objectSetVal(ob, newzs);
459458
zs = newzs;
460459
}
461460
if ((newzsl = activeDefragAlloc(zs->zsl))) zs->zsl = newzsl;
462-
if ((newheader = activeDefragAlloc(zs->zsl->header))) zs->zsl->header = newheader;
463461

464462
hashtable *newtable;
465463
if ((newtable = hashtableDefragTables(zs->ht, activeDefragAlloc))) zs->ht = newtable;

src/lazyfree.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ size_t lazyfreeGetFreeEffort(robj *key, robj *obj, int dbid) {
134134
return hashtableSize(ht);
135135
} else if (obj->type == OBJ_ZSET && obj->encoding == OBJ_ENCODING_SKIPLIST) {
136136
zset *zs = objectGetVal(obj);
137-
return zs->zsl->length;
137+
return zslGetLength(zs->zsl);
138138
} else if (obj->type == OBJ_HASH && obj->encoding == OBJ_ENCODING_HASHTABLE) {
139139
hashtable *ht = objectGetVal(obj);
140140
return hashtableSize(ht);

src/object.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -720,11 +720,11 @@ void dismissZsetObject(robj *o, size_t size_hint) {
720720
if (o->encoding == OBJ_ENCODING_SKIPLIST) {
721721
zset *zs = objectGetVal(o);
722722
zskiplist *zsl = zs->zsl;
723-
serverAssert(zsl->length != 0);
723+
serverAssert(zslGetLength(zsl) != 0);
724724
/* We iterate all nodes only when average member size is bigger than a
725725
* page size, and there's a high chance we'll actually dismiss something. */
726-
if (size_hint / zsl->length >= server.page_size) {
727-
zskiplistNode *zn = zsl->tail;
726+
if (size_hint / zslGetLength(zsl) >= server.page_size) {
727+
zskiplistNode *zn = zslGetTail(zsl);
728728
while (zn != NULL) {
729729
zskiplistNode *next = zn->backward;
730730
dismissMemory(zn, 0);
@@ -1244,9 +1244,9 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
12441244
} else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
12451245
hashtable *ht = ((zset *)objectGetVal(o))->ht;
12461246
zskiplist *zsl = ((zset *)objectGetVal(o))->zsl;
1247-
zskiplistNode *znode = zsl->header->level[0].forward;
1248-
asize += sizeof(zset) + sizeof(zskiplist) +
1249-
hashtableMemUsage(ht) + zmalloc_size(zsl->header);
1247+
zskiplistNode *zheader = zslGetHeader(zsl);
1248+
zskiplistNode *znode = zheader->level[0].forward;
1249+
asize += sizeof(zset) + zslGetAllocSize() + hashtableMemUsage(ht);
12501250
while (znode != NULL && samples < sample_size) {
12511251
elesize += zmalloc_size(znode);
12521252
samples++;

src/rdb.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -953,7 +953,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid, unsigned char rdbt
953953
zset *zs = objectGetVal(o);
954954
zskiplist *zsl = zs->zsl;
955955

956-
if ((n = rdbSaveLen(rdb, zsl->length)) == -1) return -1;
956+
if ((n = rdbSaveLen(rdb, zslGetLength(zsl))) == -1) return -1;
957957
nwritten += n;
958958

959959
/* We save the skiplist elements from the greatest to the smallest
@@ -962,7 +962,7 @@ ssize_t rdbSaveObject(rio *rdb, robj *o, robj *key, int dbid, unsigned char rdbt
962962
* element will always be the smaller, so adding to the skiplist
963963
* will always immediately stop at the head, making the insertion
964964
* O(1) instead of O(log(N)). */
965-
zskiplistNode *zn = zsl->tail;
965+
zskiplistNode *zn = zslGetTail(zsl);
966966
while (zn != NULL) {
967967
sds ele = zslGetNodeElement(zn);
968968
if ((n = rdbSaveRawString(rdb, (unsigned char *)ele, sdslen(ele))) == -1) {

src/server.h

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1482,23 +1482,34 @@ struct sharedObjectsStruct {
14821482

14831483
/* ZSETs use a specialized version of Skiplists */
14841484
typedef struct zskiplistNode {
1485-
double score;
1486-
struct zskiplistNode *backward;
1485+
union {
1486+
double score; /* Sorting score for node ordering. */
1487+
unsigned long length; /* Number of elements in the skiplist. */
1488+
};
1489+
union {
1490+
struct zskiplistNode *backward; /* Pointer to previous node for reverse traversal. */
1491+
struct zskiplistNode *tail; /* Tail element of the skiplist. */
1492+
};
14871493
struct zskiplistLevel {
14881494
struct zskiplistNode *forward;
14891495
/* At each level we keep the span, which is the number of elements which are on the "subtree"
14901496
* from this node at this level to the next node at the same level.
14911497
* One exception is the value at level 0. In level 0 the span can only be 1 or 0 (in case the last elements in the list)
14921498
* So we use it in order to hold the height of the node, which is the number of levels. */
14931499
unsigned long span;
1494-
} level[];
1495-
/* After the level[], sds header length (1 byte) and an embedded sds element are stored. */
1500+
} level[1]; /* Flexible array member - actual levels determined at node creation. */
1501+
/* For non-header nodes, after the level[], sds header length (1 byte) and an embedded sds element are stored. */
14961502
} zskiplistNode;
14971503

1504+
/* The header node does not store actual data (no score, no backward pointer,
1505+
* and its node height is fixed at ZSKIPLIST_MAXLEVEL).
1506+
* To save memory, we reuse the memory space of these fields in the header node to store:
1507+
* - skiplist length (number of elements)
1508+
* - tail pointer to the last element
1509+
* - maximum current level of the skiplist
1510+
* For detailed memory layout, refer to the zskiplistNode struct definition. */
14981511
typedef struct zskiplist {
1499-
struct zskiplistNode *header, *tail;
1500-
unsigned long length;
1501-
int level;
1512+
zskiplistNode header;
15021513
} zskiplist;
15031514

15041515
typedef struct zset {
@@ -3345,6 +3356,12 @@ typedef struct {
33453356
#define ERROR_COMMAND_FAILED (1 << 1) /* Indicate to update the command failed stats */
33463357

33473358
zskiplist *zslCreate(void);
3359+
int zslGetHeight(const zskiplist *zsl);
3360+
zskiplistNode *zslGetTail(const zskiplist *zsl);
3361+
void zslSetTail(zskiplist *zsl, zskiplistNode *tail);
3362+
unsigned long zslGetLength(const zskiplist *zsl);
3363+
zskiplistNode *zslGetHeader(zskiplist *zsl);
3364+
size_t zslGetAllocSize(void);
33483365
void zslFree(zskiplist *zsl);
33493366
zskiplistNode *zslInsert(zskiplist *zsl, double score, const_sds ele);
33503367
zskiplistNode *zslNthInRange(zskiplist *zsl, zrangespec *range, long n, long *rank);

src/sort.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,10 +426,11 @@ void sortCommandGeneric(client *c, int readonly) {
426426
if (desc) {
427427
long zsetlen = hashtableSize(((zset *)objectGetVal(sortval))->ht);
428428

429-
ln = zsl->tail;
429+
ln = zslGetTail(zsl);
430430
if (start > 0) ln = zslGetElementByRank(zsl, zsetlen - start);
431431
} else {
432-
ln = zsl->header->level[0].forward;
432+
zskiplistNode *zheader = zslGetHeader(zsl);
433+
ln = zheader->level[0].forward;
433434
if (start > 0) ln = zslGetElementByRank(zsl, start + 1);
434435
}
435436

0 commit comments

Comments
 (0)