redis源码分析——5、dict实现

编程入门 行业动态 更新时间:2024-10-06 18:26:09

redis<a href=https://www.elefans.com/category/jswz/34/1770099.html style=源码分析——5、dict实现"/>

redis源码分析——5、dict实现

dics是一种常用的数据结构,而C语言没有提供内置类型,本文一起看看redis中dict的实现

1. 怎么样自己实现一个dict

​ 不像C++、java等高级语言内置了map,C语言并没有提供dict库,所以如果想使用dict就需要自己实现。那么实现一个dict有方法呢?

  1. 数组法
    • 这也是最容易实现的一种方法,简单说就是开辟一个长度为N的大数组(通常N是一个质数),然后通过一个哈希函数计算key的哈希值d,然后用d%N得到key对于的数组下边。设想一下,如果数组开的足够大,而且哈希函数足够散列,我们可以保证不同的key的对应的下标绝不会冲突,那么这样就可以实现一个dict了。
    • 上述介绍实现dict的方法在实际中有很大的限制,比如我们不知道数组开大多大合适,即使开了一百万,那么随着业务的增长,这一百万也不够。其次没有一个哈希函数能够保证不同key的哈希值不重复。
    • 鉴于此,我们在用数组实现dict的时候数组长度一般是动态的,根据元素个数/数组长度这一指标来判断扩缩容,其次我们找一个尽量能打散的哈希函数。最后,对于冲突的key,我们可以简单的使用拉链法解决冲突。
  2. 红黑树法
    • C++中的map底层实现就是红黑树。红黑树的核心就是尽量保证二叉树是左右平衡的,所以红黑树的实现较为复杂,需要区分各种情况旋转。不需要哈希函数,只需要实现key的比较函数就可以了,所以也就不存在key冲突。
  3. 跳表法
    • 如果想实现红黑树的效果,但是又不想实现红黑树,那么跳表是个不错的选择,它的效率几乎和红黑树等价。redis中zset底层就使用的了调表,具体实现后续再说。

2. redis中dict的实现

  • redis中dict的实现用了上述第一种方法也就是数组拉链法,对于哈希函数,redis选用了Murmurhash2。下面看看结构定义。

    typedef struct dictEntry {void *key;union {void *val;uint64_t u64;int64_t s64;double d;} v;struct dictEntry *next;
    } dictEntry; /* entry可以认为是一个节点,除了存放value以外还存放了key和next,这是因为如果发生了冲突,我们需要进一步比较key */typedef struct dictType {uint64_t (*hashFunction)(const void *key);void *(*keyDup)(void *privdata, const void *key);void *(*valDup)(void *privdata, const void *obj);int (*keyCompare)(void *privdata, const void *key1, const void *key2);void (*keyDestructor)(void *privdata, void *key);void (*valDestructor)(void *privdata, void *obj);
    } dictType; /* 这里定义一些接口,可以实现不同的处理函数达,这样就达到了接口的效果 *//* This is our hash table structure. Every dictionary has two of this as we* implement incremental rehashing, for the old to the new table. */
    typedef struct dictht {dictEntry **table; /* 这儿为什么是指针的指针? 其实是指针数组,每个元素存放的entry链表的头指针 */unsigned long size;unsigned long sizemask;unsigned long used;
    } dictht; /* 定义了数组,table其实是个指针数组,根据下标可以访问到key*/typedef struct dict {dictType *type;	/* 自定义各种字典的实现接口 */void *privdata; /* 自定义数据字段 */dictht ht[2]; /* 2个table,主要是在rehash时候用*/long rehashidx; /* rehashing not in progress if rehashidx == -1 */unsigned long iterators; /* number of iterators currently running */
    } dict;
    

3. 基本操作

  • 公共操作

    • 根据key查找数组中的index

      /* Returns the index of a free slot that can be populated with* a hash entry for the given 'key'.* If the key already exists, -1 is returned* and the optional output parameter may be filled.** Note that if we are in the process of rehashing the hash table, the* index is always returned in the context of the second (new) hash table. */
      /* 根据key计算其所应该在的槽位,如果key已经存在,则返回-1 * 函数同时传入key和哈希值两个参数,这是因为可能出现冲突,仅仅靠哈希值还不能判断是否存在,必须还要对比key是否相同* 如果传入的existing不为空,则existing返回已经存在的结点地址*/
      static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)
      {unsigned long idx, table;dictEntry *he;if (existing) *existing = NULL;/* Expand the hash table if needed */if (_dictExpandIfNeeded(d) == DICT_ERR)return -1;for (table = 0; table <= 1; table++) {idx = hash & d->ht[table].sizemask;/* Search if this slot does not already contain the given key */he = d->ht[table].table[idx];while(he) {if (key==he->key || dictCompareKeys(d, key, he->key)) {if (existing) *existing = he;return -1;}he = he->next;}if (!dictIsRehashing(d)) break;}return idx;
      }
      
    • 扩容

      /* Expand the hash table if needed */
      static int _dictExpandIfNeeded(dict *d)
      {/* Incremental rehashing already in progress. Return. */if (dictIsRehashing(d)) return DICT_OK;/* If the hash table is empty expand it to the initial size. *//* 如果数组还是空,则直接扩容到DICT_HT_INITIAL_SIZE,默认是4 */if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);/* If we reached the 1:1 ratio, and we are allowed to resize the hash* table (global setting) or we should avoid it but the ratio between* elements/buckets is over the "safe" threshold, we resize doubling* the number of buckets. *//* 触发扩容有几个条件* 如果“填充率”>=1,也就是used>=size时应该扩容,当然也要看当前容不容许扩容* 如果“填充率”>dict_force_resize_ratio(默认是5),也就是说平均冲突率大于5的时候就强制扩容*/if (d->ht[0].used >= d->ht[0].size &&(dict_can_resize ||d->ht[0].used/d->ht[0].size > dict_force_resize_ratio)){return dictExpand(d, d->ht[0].used*2);}return DICT_OK;
      }
      
    • rehash

      /* 几乎所有的dict操作函数中都有_dictRehashStep调用,这其实是将扩容过程分摊了,否组扩容可能会消耗很长时间 */
      static void _dictRehashStep(dict *d) {if (d->iterators == 0) dictRehash(d,1);
      }/* Performs N steps of incremental rehashing. Returns 1 if there are still* keys to move from the old to the new hash table, otherwise 0 is returned.** Note that a rehashing step consists in moving a bucket (that may have more* than one key as we use chaining) from the old to the new hash table, however* since part of the hash table may be composed of empty spaces, it is not* guaranteed that this function will rehash even a single bucket, since it* will visit at max N*10 empty buckets in total, otherwise the amount of* work it does would be unbound and the function may block for a long time. */
      int dictRehash(dict *d, int n) {int empty_visits = n*10; /* Max number of empty buckets to visit. */if (!dictIsRehashing(d)) return 0;while(n-- && d->ht[0].used != 0) {dictEntry *de, *nextde;/* Note that rehashidx can't overflow as we are sure there are more* elements because ht[0].used != 0 */assert(d->ht[0].size > (unsigned long)d->rehashidx);while(d->ht[0].table[d->rehashidx] == NULL) { /* 找到非空结点 */d->rehashidx++;if (--empty_visits == 0) return 1; /* 这一行在3.0是没有的,这么写是防止while卡在空结点上 */}de = d->ht[0].table[d->rehashidx];/* Move all the keys in this bucket from the old to the new hash HT */while(de) {uint64_t h;nextde = de->next;/* Get the index in the new hash table */h = dictHashKey(d, de->key) & d->ht[1].sizemask; /* 将该槽下的所有结点插入到新table的对应槽位 */de->next = d->ht[1].table[h];d->ht[1].table[h] = de;d->ht[0].used--;d->ht[1].used++;de = nextde;}d->ht[0].table[d->rehashidx] = NULL;d->rehashidx++;}/* Check if we already rehashed the whole table... *//* 如果h[0]的元素为空,这说明rehash完成,则将h[0]释放,且h[0]接管h[1]*/if (d->ht[0].used == 0) {zfree(d->ht[0].table);d->ht[0] = d->ht[1];_dictReset(&d->ht[1]);d->rehashidx = -1;return 0;}/* More to rehash... */return 1;
      }
  • 创建

    /* Create a new hash table */
    dict *dictCreate(dictType *type,void *privDataPtr)
    {dict *d = zmalloc(sizeof(*d));_dictInit(d,type,privDataPtr);return d;
    }/* Initialize the hash table */
    int _dictInit(dict *d, dictType *type,void *privDataPtr)
    {_dictReset(&d->ht[0]); /* 所有成员置为0或者null*/_dictReset(&d->ht[1]);d->type = type;d->privdata = privDataPtr;d->rehashidx = -1;d->iterators = 0;return DICT_OK;
    }
    
  • 插入

    /* 根据key新建一个entry,如果key已经存在,则返回null */
    dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
    {long index;dictEntry *entry;dictht *ht;if (dictIsRehashing(d)) _dictRehashStep(d);/* Get the index of the new element, or -1 if* the element already exists. *//* 对于新加结点,如果key已经存在,则直接返回NULL*/if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -1)return NULL;/* Allocate the memory and store the new entry.* Insert the element in top, with the assumption that in a database* system it is more likely that recently added entries are accessed* more frequently. *//* 根据就近访问原则,新节点插入到头 */ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];entry = zmalloc(sizeof(*entry));entry->next = ht->table[index];ht->table[index] = entry;ht->used++;/* Set the hash entry fields. *//* 设置结点的key */dictSetKey(d, entry, key);return entry;
    }/* Add an element to the target hash table */
    int dictAdd(dict *d, void *key, void *val)
    {/* 首先根据key新建了一个entry */dictEntry *entry = dictAddRaw(d,key,NULL);if (!entry) return DICT_ERR;/* 给该entry设置value */dictSetVal(d, entry, val);return DICT_OK;
    }
    
  • 删除

    /* Remove an element, returning DICT_OK on success or DICT_ERR if the* element was not found. */
    int dictDelete(dict *ht, const void *key) {return dictGenericDelete(ht,key,0) ? DICT_OK : DICT_ERR;
    }/* Search and remove an element. This is an helper function for* dictDelete() and dictUnlink(), please check the top comment* of those functions. */
    /* 根据key删除元素 */
    static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {uint64_t h, idx;dictEntry *he, *prevHe;int table;if (d->ht[0].used == 0 && d->ht[1].used == 0) return NULL; /* dict为空,短路返回*/if (dictIsRehashing(d)) _dictRehashStep(d);h = dictHashKey(d, key); /* 计算hash值 */for (table = 0; table <= 1; table++) { /* 分别变量h[0]和h[1] */idx = h & d->ht[table].sizemask; /* 根据key的hash值计算出key在hash tale中的槽位 */he = d->ht[table].table[idx]; /* 得到同hash 值的entry列表 */prevHe = NULL;while(he) {if (key==he->key || dictCompareKeys(d, key, he->key)) { /* 找到了key *//* Unlink the element from the list */if (prevHe)prevHe->next = he->next; /* 链表的删除逻辑 */elsed->ht[table].table[idx] = he->next;if (!nofree) {dictFreeKey(d, he);dictFreeVal(d, he);zfree(he);}d->ht[table].used--;return he;}prevHe = he;he = he->next;}if (!dictIsRehashing(d)) break;}return NULL; /* not found */
    }
    

原文出处:.html

更多推荐

redis源码分析——5、dict实现

本文发布于:2024-02-13 16:43:06,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1759712.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:源码   redis   dict

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!