Redis字典

  1. 重点

    1. 数据结构

      1.  1
         2
         3
         4
         5
         6
         7
         8
         9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        
        struct dictEntry {
          //key
            void *key;
          //使用了union类型,节约内存空间
            union {
                void *val;
                uint64_t u64;
                int64_t s64;
                double d;
            } v;
          //这里指向了下一个节点
          //可以看得出来这里使用了头插法进行链表构建
          //针对新的节点由头部进行插入
          //新的节点就在同一个hash桶中
          //TODO 为什么在一个hash桶呢
            struct dictEntry *next;     /* Next entry in the same hash bucket. */
          //任意数量的字节(?)
            void *metadata[];           /* An arbitrary number of bytes (starting at a
                                         * pointer-aligned address) of size as returned
                                         * by dictType's dictEntryMetadataBytes(). */
        };
        
      2.  1
         2
         3
         4
         5
         6
         7
         8
         9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        
        typedef struct dictType {
          //dict相关的操作函数,以函数指针的方式存在
            uint64_t (*hashFunction)(const void *key);
            void *(*keyDup)(dict *d, const void *key);
            void *(*valDup)(dict *d, const void *obj);
            int (*keyCompare)(dict *d, const void *key1, const void *key2);
            void (*keyDestructor)(dict *d, void *key);
            void (*valDestructor)(dict *d, void *obj);
            int (*expandAllowed)(size_t moreMem, double usedRatio);
            /* Flags */
          //如果no_value 设置 设置当前flag,则无法进行访问
            /* The 'no_value' flag, if set, indicates that values are not used, i.e. the
             * dict is a set. When this flag is set, it's not possible to access the
             * value of a dictEntry and it's also impossible to use dictSetKey(). Entry
             * metadata can also not be used. */
            unsigned int no_value:1;
            /* If no_value = 1 and all keys are odd (LSB=1), setting keys_are_odd = 1
             * enables one more optimization: to store a key without an allocated
             * dictEntry. */
            unsigned int keys_are_odd:1;
            /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
             * flag is set. */
        
            /* Allow each dict and dictEntry to carry extra caller-defined metadata. The
             * extra memory is initialized to 0 when allocated. */
            size_t (*dictEntryMetadataBytes)(dict *d);
            size_t (*dictMetadataBytes)(void);
            /* Optional callback called after an entry has been reallocated (due to
             * active defrag). Only called if the entry has metadata. */
            void (*afterReplaceEntry)(dict *d, dictEntry *entry);
        } dictType;
        
      3. 1
        2
        3
        4
        5
        6
        7
        8
        
        typedef struct dict {
            dictEntry **table;
            dictType *type;
            unsigned long size;//使用的数量
            unsigned long sizemask;//mask计算索引值
            unsigned long used;//表示已经使用的个数
            void *privdata; //按照变量名称认为为私有数据指针,但是从代码目前看来。并没有被实际使用
        } dict;
        
      4. 1
        2
        3
        4
        5
        6
        
        
        typedef struct dictIterator {
            dict *ht;
            int index;
            dictEntry *entry, *nextEntry;
        } dictIterator;
        
      5.  1
         2
         3
         4
         5
         6
         7
         8
         9
        10
        11
        12
        13
        14
        15
        16
        17
        
        struct dict {
            dictType *type;
        
            dictEntry **ht_table[2];
            unsigned long ht_used[2];
        
            long rehashidx; /* rehashing not in progress if rehashidx == -1 */
        
            /* Keep small vars at end for optimal (minimal) struct padding */
            int16_t pauserehash; /* If >0 rehashing is paused (<0 indicates coding error) */
            signed char ht_size_exp[2]; /* exponent of size. (size = 1<<exp) */
          //新的dictht和旧的dictht,一般只会使用0,ht[1]哈希表只会对ht[0]哈希表进行rehash操作
            //如果rehashidx==-1表示不会进行rehash
            void *metadata[];           /* An arbitrary number of bytes (starting at a
                                         * pointer-aligned address) of size as defined
                                         * by dictType's dictEntryBytes. */
        };
        
      6.  1
         2
         3
         4
         5
         6
         7
         8
         9
        10
        11
        
        /* Our hash table capability is a power of two */
        static unsigned long _dictNextPower(unsigned long size) {
            unsigned long i = DICT_HT_INITIAL_SIZE;
        // dictht中的size每次都是<=2^s来进行分配
            if (size >= LONG_MAX) return LONG_MAX;
            while(1) {
                if (i >= size)
                    return i;
                i *= 2;
            }
        }
        
      7. 1
        2
        3
        
        #define dictIsRehashing(d) ((d)->rehashidx != -1)
        // - dict中的rehashindex=-1|
        // 标记是否在进行rehash,默认-1表示未进行,#表示当前dt[0]中第#个tb实例
        
      8.  1
         2
         3
         4
         5
         6
         7
         8
         9
        10
        11
        12
        13
        14
        15
        16
        17
        18
        19
        20
        21
        22
        23
        24
        25
        26
        27
        28
        29
        30
        31
        32
        33
        34
        35
        36
        37
        38
        39
        40
        41
        42
        43
        44
        45
        46
        47
        48
        49
        50
        51
        52
        53
        54
        55
        56
        57
        58
        59
        60
        61
        62
        63
        64
        65
        66
        67
        68
        69
        70
        71
        72
        73
        74
        75
        76
        77
        78
        79
        80
        81
        82
        83
        84
        85
        86
        
        int dictRehash(dict *d, int n) {
            int empty_visits = n*10; /* Max number of empty buckets to visit. */
            if (dict_can_resize == DICT_RESIZE_FORBID || !dictIsRehashing(d)) return 0;
            if (dict_can_resize == DICT_RESIZE_AVOID && 
                (DICTHT_SIZE(d->ht_size_exp[1]) / DICTHT_SIZE(d->ht_size_exp[0]) < dict_force_resize_ratio))
            {
                return 0;
            }
        
            while(n-- && d->ht_used[0] != 0) {
                dictEntry *de, *nextde;
        
                /* Note that rehashidx can't overflow as we are sure there are more
                 * elements because ht[0].used != 0 */
                assert(DICTHT_SIZE(d->ht_size_exp[0]) > (unsigned long)d->rehashidx);
              	//如果出现了空桶的话,则直接进行跳过,如果超过了10*n个桶,此时也直接切换以防长时间占用
                while(d->ht_table[0][d->rehashidx] == NULL) {
                    d->rehashidx++;
                    if (--empty_visits == 0) return 1;
                }
              //表明当前桶不为空
                de = d->ht_table[0][d->rehashidx];
                /* Move all the keys in this bucket from the old to the new hash HT */
                while(de) {
                    uint64_t h;
        
                    nextde = dictGetNext(de);
                    void *key = dictGetKey(de);
                    /* Get the index in the new hash table */
                    if (d->ht_size_exp[1] > d->ht_size_exp[0]) {
                        h = dictHashKey(d, key) & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
                    } else {
                        /* We're shrinking the table. The tables sizes are powers of
                         * two, so we simply mask the bucket index in the larger table
                         * to get the bucket index in the smaller table. */
                        h = d->rehashidx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
                    }
                    if (d->type->no_value) {
                        if (d->type->keys_are_odd && !d->ht_table[1][h]) {
                            /* Destination bucket is empty and we can store the key
                             * directly without an allocated entry. Free the old entry
                             * if it's an allocated entry.
                             *
                             * TODO: Add a flag 'keys_are_even' and if set, we can use
                             * this optimization for these dicts too. We can set the LSB
                             * bit when stored as a dict entry and clear it again when
                             * we need the key back. */
                            assert(entryIsKey(key));
                            if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
                            de = key;
                        } else if (entryIsKey(de)) {
                            /* We don't have an allocated entry but we need one. */
                            de = createEntryNoValue(key, d->ht_table[1][h]);
                        } else {
                            /* Just move the existing entry to the destination table and
                             * update the 'next' field. */
                            assert(entryIsNoValue(de));
                            dictSetNext(de, d->ht_table[1][h]);
                        }
                    } else {
                        dictSetNext(de, d->ht_table[1][h]);
                    }
                    d->ht_table[1][h] = de;
                    d->ht_used[0]--;
                    d->ht_used[1]++;
                    de = nextde;
                }
                d->ht_table[0][d->rehashidx] = NULL;
                d->rehashidx++;
            }
        
            /* Check if we already rehashed the whole table... */
            if (d->ht_used[0] == 0) {
                zfree(d->ht_table[0]);
                /* Copy the new ht onto the old one */
                d->ht_table[0] = d->ht_table[1];
                d->ht_used[0] = d->ht_used[1];
                d->ht_size_exp[0] = d->ht_size_exp[1];
                _dictReset(d, 1);
                d->rehashidx = -1;
                return 0;
            }
        
            /* More to rehash... */
            return 1;
        }
        
      9. dictht中的sizemask用于计算当前的key位于哪一个dictEntry。其大小等于size-1,index = hash & dict->ht .sizemask;使用&运算要比%性能更高

    2. 哈希算法

      在redis中hash默认使用的是siphash算法(当然也可以自定义)。计算哈希值和索引值的方法如下:

      1. 使用字典设置的哈希函数,计算键 key 的哈希值

      hash = dict->type->hashFunction(key);

      1. 使用哈希表的sizemask属性和第一步得到的哈希值,计算索引值

      index = hash & dict->ht[x].sizemask;相当于取模运算

      • 哈希冲突

      redis解决哈希冲突的方法是链地址法,而且采用的是头插式

      • 哈希扩容

      采用链地址法来解决冲突,如果数据量太大,大量的冲突会导致某个桶上的链表非常长,不利于数据查询,因此需要根据负载因子(负载因子用来评估键冲突的概率)来判断当前是否需要进行扩容,见函数_dictExpandIfNeeded。其中resize还与是否正在进行持久化有关:

      1
      2
      3
      4
      5
      6
      
      void updateDictResizePolicy(void) {
          if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
              dictEnableResize();
          else
              dictDisableResize();
      }
      

      使用 dictEnableResize() / dictDisableResize() 我们可以根据需要启用/禁用哈希表的大小调整。 这对 Redis 来说非常重要,因为我们使用写时复制,并且不想在有子执行保存操作时移动太多内存。

      所以resize 与当前正在进行的持久化相关

    3. rehash优化

  2. 迭代器

    1. 数据结构
    2. 迭代器类型
    3. 迭代器选择
  3. 线段跳表