[redis设计与实现][3]基本数据结构——字典

Redis字典采用哈希表实现。
哈希表:
[cce lang=”c”]
typedef struct dictht {
//哈希表数组
dictEntry **table;
//哈希表大小
unsigned long size;
//哈希表掩码,用于计算索引值,总是等于size – 1
unsigned long size mask;
//已有的节点数量
unsigned long used;
} dictht;
[/cce]

哈希表节点:
[cce lang=”c”]
typedef struct dictEntry {
//键
void *key;
//值
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
//下一个哈希表节点
struct dictEntry *next;
} dictEntry;
[/cce]

字典:
[cce lang=”c”]
typedef struct dict {
//类型特定的函数
dictType *type;
//私有数据
void *privdata;
//哈希表
dictht ht[2];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
int iterators; /* number of iterators currently running */
} dict;
[/cce]

* type属性是一个指向dictType结构的指针,每个dictType结构保存了一簇用于操作特定类型键值对的函数
* privdata保存了需要传给类型特定函数的可选参数

[cce lang=”c”]
typedef struct dictType {
//计算哈希值的函数
unsigned int (*hashFunction)(const void *key);
//复制键的函数
void *(*keyDup)(void *privdata, const void *key);
//复制值的函数
void *(*valDup)(void *privdata, const void *obj);
//对比键的函数
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
//销毁键的函数
void (*keyDestructor)(void *privdata, void *key);
//销毁值的函数
void (*valDestructor)(void *privdata, void *obj);
} dictType;
[/cce]

ht属性包含两个项的数组。一般情况下只使用ht[0]哈希表,ht[1]哈希表只在对ht[0]进行rehash的时候才会使用。

哈希算法:
计算:
hash = dict->type->hashFunction(key)
index = hash & dict->ht[x].sizemask
当字典被用作数据库的底层实现,或者哈希键的底层实现时,Redis使用MurmurHash2(https://code.google.com/p/smhasher/wiki/MurmurHash2)算法计算键的哈希值。

int dictAdd(dict *d, void *key, void *val);
[cce lang=”c”]
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key);

if (!entry) return DICT_ERR;
dictSetVal(d, entry, val);
return DICT_OK;
}
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;
//#define dictIsRehashing(d) ((d)->rehashidx != -1)
if (dictIsRehashing(d)) _dictRehashStep(d);

/* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key)) == -1)
return NULL;

/* Allocate the memory and store the new entry */
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++;

/* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
}
static int _dictKeyIndex(dict *d, const void *key)
{
unsigned int h, idx, table;
dictEntry *he;

/* Expand the hash table if needed */
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -1;
/* Compute the key hash value */
//#define dictHashKey(d, key) (d)->type->hashFunction(key)
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
//比较key是否已经存在,已经存在返回-1
if (dictCompareKeys(d, key, he->key))
return -1;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return idx;
}
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK;

/* If the hash table is empty expand it to the initial size. */
//#define DICT_HT_INITIAL_SIZE 4
if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);

/* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the “safe” threshold, we resize doubling
* the number of buckets. */
//static unsigned int dict_force_resize_ratio = 5;
/*
dict_can_resize设置:
void updateDictResizePolicy(void) {
if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
dictEnableResize();
else
dictDisableResize();
}
当有同步硬盘进程的时候改成不能扩充
*/
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
{
return dictExpand(d, d->ht[0].used*2);
}
return DICT_OK;
}
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
unsigned long realsize = _dictNextPower(size);

/* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[0].used > size)
return DICT_ERR;

/* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-1;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = 0;

/* Is this the first initialization? If so it’s not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[0].table == NULL) {
d->ht[0] = n;
return DICT_OK;
}

/* Prepare a second hash table for incremental rehashing */
d->ht[1] = n;
d->rehashidx = 0;
return DICT_OK;
}
int dictReplace(dict *d, void *key, void *val);

/* Add an element, discarding the old if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an
* element with such key and dictReplace() just performed a value update
* operation. */
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, auxentry;

/* Try to add the element. If the key
* does not exists dictAdd will suceed. */
if (dictAdd(d, key, val) == DICT_OK)
return 1;
/* It already exists, get the entry */
entry = dictFind(d, key);
/* Set the new value and free the old one. Note that it is important
* to do that in this order, as the value may just be exactly the same
* as the previous one. In this context, think to reference counting,
* you want to increment (set), and then decrement (free), and not the
* reverse. */
auxentry = *entry;
dictSetVal(d, entry, val);
dictFreeVal(d, &auxentry);
return 0;
}
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table;

if (d->ht[0].size == 0) return NULL; /* We don’t have a table at all */
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
for (table = 0; table <= 1; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
if (dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
[/cce]

int dictRehash(dict *d, int n);
[cce lang=”c”]
int dictRehash(dict *d, int n) {
if (!dictIsRehashing(d)) return 0;

while(n–) {
dictEntry *de, *nextde;

/* Check if we already rehashed the whole table… */
//已经完成hash,释放ht[0]。将ht[0]指向ht[1]
if (d->ht[0].used == 0) {
zfree(d->ht[0].table);
d->ht[0] = d->ht[1];
_dictReset(&d->ht[1]);
d->rehashidx = -1;
return 0;
}

/* Note that rehashidx can’t overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[0].size > (unsigned long)d->rehashed);
//如果rehash索引为空,跳过
while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
de = d->ht[0].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
//移动一个桶里面的所有key到新的哈希表
while(de) {
unsigned int h;

nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[1].sizemask;
de->next = d->ht[1].table[h];
d->ht[1].table[h] = de;
d->ht[0].used–;
d->ht[1].used++;
de = nextde;
}
d->ht[0].table[d->rehashidx] = NULL;
d->rehashidx++;
}
return 1;
}

//为了防止占用太多的CPU时间,限制一次rehash的CPU时间
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;

while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
[/cce]

调用者(redis.c):每次尝试渐进式rehash执行1ms
[cce lang=”c”]
int incrementallyRehash(int dbid) {
/* Keys dictionary */
if (dictIsRehashing(server.db[dbid].dict)) {
dictRehashMilliseconds(server.db[dbid].dict,1);
return 1; /* already used our millisecond for this loop… */
}
/* Expires */
if (dictIsRehashing(server.db[dbid].expires)) {
dictRehashMilliseconds(server.db[dbid].expires,1);
return 1; /* already used our millisecond for this loop… */
}
return 0;
}
[/cce]

转载自:https://coolex.info/blog/439.html

时间: 2024-09-10 06:06:57

[redis设计与实现][3]基本数据结构——字典的相关文章

[redis设计与实现][7]基本数据结构——对象

Redis对基础数据类型进行了封装,构建出上层的对象系统,这个系统包含:字符串对象.列表对象.哈希对象.集合对象和有序集合对象. Redis对象结构: [cce lang="c"] typedef struct redisObject { //类型 unsigned type:4; //编码 unsigned encoding:4; //LRU时间 unsigned lru:REDIS_LRU_BITS; /* lru time (relative to server.lruclock

[redis设计与实现][4]基本数据结构——跳跃表

Redis使用跳跃表和字典共同来实现有序集合键(sorted set). 定义: 跳跃表节点: [cce lang="c"] typedef struct zskiplistNode { //成员对象 robj *obj; //分值 double score; //后退指针 struct zskiplistNode *backward; //层结构 struct zskiplistLevel { //前进指针 struct zskiplistNode *forward; //跨度 un

[redis设计与实现][1]基本数据结构——sds

SDS(Simple Dynamic String):对C字符串的封装,可修改.可自动伸缩的字符串实现.Redis默认的字符串实现. SDS定义:(sds.h) [cce lang="c"] struct sdshdr { unsigned int len; unsigned int free; char buf[]; }; [/cce] 与C字符串的区别: * 常数复杂度获取字符串长度(字符串长度已经记录在结构体中) * 杜绝缓冲区溢出(每次操作前都会检查空间是否充足,自动扩张和收缩

[redis设计与实现][6]基本数据结构——压缩列表

压缩列表(ziplist)是列表键和哈希键的底层实现之一. 压缩列表结构: 属性 类型 长度 用途 zlbytes uint32_t 4字节 记录整个压缩列表占用的内存字节数:在对压缩列表进行内存重分配,或者计算zlend的位置时使用 zltail uint32_t 4字节 记录压缩列表尾节点记录压缩列表的起始地址便宜:用于快速定位尾节点 zllen uint16_t 2字节  记录压缩列表包含的节点数量:当这个值小于UINT64_MAX(65535)时,这个属性的值就是压缩列表包含的节点数量:

[redis设计与实现][5]基本数据结构——整数集合

整数集合(intset)用于集合键.当一个集合只包含整数值元素,并且数量不多的时候,会使用整数集合作为集合键的底层实现.相对于直接保存字符串,整数集合能够很好地节约内存,但是由于是数组保存,需要特别关注数组长度. 定义:(intset.h) [cce lang="c"] typedef struct intset { //编码方式 uint32_t encoding; //集合包含的元素数量 uint32_t length; //保存元素的数组 int8_t contents[]; }

《Redis设计与实现》阅读:Redis底层研究之简单动态字符串SDS

        除仅用于字符串字面量的情况外,对于可以被修改值的字符串的表示,Redis底层并没有采用C语言传统的字符串表示,即以空字符结尾的字符数组,而是采用专门为其设计的简单动态字符串作为其默认字符串表示,其英文全称为Simple Dynamic String,简称SDS.除了用于保存数据库中字符串值外,SDS也可以用于缓冲区buffer,比如AOF中的缓冲区.客户端输入缓冲区等.本文,我们将详细研究简单动态字符串SDS的实现及其在性能等方面的独特之处.             注:内容总结

[redis设计与实现][10]sentinel——简介和启动

Sentinel(Redis 3.0.0-rc1) Sentinel是Redis HA方案,一个或多个Sentinel实例组成的Sentinel系统,可以监视任意多个主服务器(master), 以及这些主服务器属下的所有从服务器(slave),并在被监视的主服务器进入下线状态时,自动在将被下线主服务器属下的某个从服务器升级为新的主服务器, 然后由新的主服务器代替已下线的主服务器继续处理命令请求. 基础数据结构 typedef struct sentinelRedisInstance { // 当

《Redis设计与实现》阅读:Redis底层研究之哈希表hashtable

        字典是一种存储键值对的抽象数据结构,其又被称为符号表(symbol table).关联数组(associative array)或映射(map).Redis使用字典存储键值对,而Redis在底层是通过自定义的哈希表来实现字典这一数据结构的.本文,我们将研究Redis中哈希表的实现.         结构         一个哈希表包含多个哈希表节点,每个哈希表节点保存了字典中的一个键值对.在Redis中,哈希表用dictht表示,其结构如下:         其中,      

[redis设计与实现][8]数据库

Redis数据库定义: typedef struct redisDb { dict *dict; /* The keyspace for this DB */ dict *expires; /* Timeout of keys with a timeout set */ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */ dict *ready_keys; /* Blocked keys that recei