redis-dict字典

redis dict

  redis 数据库结构是通过dict 字典来完成的。该dict数据结构如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
typedef struct dictEntry { void*key;                                                                                                         union {                                                              
void *val;
uint64_t u64;
int64_t s64;
double d;
}v;
struct dictEntry *next;
} dictEntry;

typedef struct dictht {
dictEntry **table; //数组存放指针
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht;
typedef struct dict {
dictType *type;
void *privdata;
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
int iterators; /* number of iterators currently running */
} dict;

从上面的数据结构的定义可以发现,ht[0].table ht[1].table 是用来实际存储数据的,它存储的逻辑也是根据hash函数计算出来的值然后与ht[0]的size中相与来决定具体的位置。这里为什么使用ht[2]呢?我们都知道在hash种免不了碰撞的key,所以随着数据的增长,每个bucket中的碰撞key也会越来越多,当达到一定的量级之后将会影响查询效率。这个时候另外一个ht就可以排上用场了。
  当正在使用的ht[x]key数达到一定的阈值之后,将会出发redis的rehash操作,该操作主要是新分配一个更大的或者更小的dictht然后,假定目前正在使用的是ht[0] 那么新分配的应该是位于ht[1]。然后循序渐进的讲数据从ht[0]慢慢的迁移到ht[1]:计算新的hash、与size与得到key的index。重复操作之后该dict的数据将趋于平衡。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table. */
int dictRehash(dict *d, int n) {
if (!dictIsRehashing(d)) return 0;

while(n--) {
dictEntry *de, *nextde;

/* Check if we already rehashed the whole table... */
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}

/* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashidx);
while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
unsigned int h;

nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used--;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
return 1;
}
1 /* Expand or create the hash table */
2 int dictExpand(dict *d, unsigned long size)
3 {
4 dictht n; /* the new hash table */
5 unsigned long realsize = _dictNextPower(size);
6
7 /* the size is invalid if it is smaller than the number of
8 ¦* elements already inside the hash table */
9 if (dictIsRehashing(d) || d->ht[0].used > size)
10 ¦ return DICT_ERR;
  11
12 /* Allocate the new hash table and initialize all pointers to NULL */
13 n.size = realsize;
14 n.sizemask = realsize-1;
15 n.table = zcalloc(realsize*sizeof(dictEntry*));
16 n.used = 0;
   17
18 /* Is this the first initialization? If so it's not really a rehashing
19 ¦* we just set the first hash table so that it can accept keys. */
20 if (d->ht[0].table == NULL) {
21 ¦ d->ht[0] = n;
22 ¦ return DICT_OK;
23 }
  24
25 /* Prepare a second hash table for incremental rehashing */
26 d->ht[1] = n;
27 d->rehashidx = 0;
28 return DICT_OK;
29 }

查找key

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;

if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
坚持原创技术分享,您的支持奖鼓励我继续创作!