radix tree,又稱做基數樹,是一種適合于構建key(index)與value(item)相關聯的數據結構。內核中使用非常廣泛。本文主要聚焦linux內核基數樹的代碼實現,大量注釋過的代碼。
radix-tree組織結構如下:
1、數據結構
/** The bottom two bits of the slot determine how the remaining bits in the slot are interpreted:** 00 - data pointer (item就是data pointer)* 01 - internal entry (指向radix-tree下一級,一個sibling entry或則指示slot中的entry被移動到其他位置的指針)* 10 - exceptional entry (存放swap entry被標記為異常項,區別于執行struct page的指針)* 11 - this bit combination is currently unused/reserved** The internal entry may be a pointer to the next level in the tree, a* sibling entry, or an indicator that the entry in this slot has been moved* to another location in the tree and the lookup should be restarted. While* NULL fits the 'data pointer' pattern, it means that there is no entry in* the tree for this index (no matter what level of the tree it is found at).* This means that you cannot store NULL in the tree as a value for the index.*/
#define RADIX_TREE_ENTRY_MASK 3UL
#define RADIX_TREE_INTERNAL_NODE 1UL/** Most users of the radix tree store pointers but shmem/tmpfs stores swap* entries in the same tree. They are marked as exceptional entries to* distinguish them from pointers to struct page.* EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.* 基數樹的大多數用戶存儲指針,但shmem/tmpfs在同一棵樹中存儲交換項,它們被標記為異常項,以區別于指向結構頁(struct page)的指針**/
#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
#define RADIX_TREE_EXCEPTIONAL_SHIFT 2下面函數用于檢查對齊情況
/*** radix_tree_exceptional_entry - radix_tree_deref_slot gave exceptional entry?* @arg: value returned by radix_tree_deref_slot* Returns: 0 if well-aligned pointer, non-0 if exceptional entry.*/
static inline int radix_tree_exceptional_entry(void *arg)
{/* Not unlikely because radix_tree_exception often tested first */return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
}/*** radix_tree_exception - radix_tree_deref_slot returned either exception?* @arg: value returned by radix_tree_deref_slot* Returns: 0 if well-aligned pointer, non-0 if either kind of exception.*/
static inline int radix_tree_exception(void *arg)
{return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
}/*config文件配置*/
CONFIG_BASE_SMALL=0 #define RADIX_TREE_MAX_TAGS 3/*shift值為基礎步長,每次增長單位為6*/
#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) //CONFIG_BASE_SMALL=0,6/*節點slots[]元素個數*/
#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) //1<<6,64
#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) //64-1= 0x3f/*RADIX_TREE_MAP_SIZE 需要 long 對齊,向上取整,1*/
#define RADIX_TREE_TAG_LONGS ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) //8×8=64/*64/6向上取整 11,radix-tree的深度,index為64bit,每層占據6bit,64/6向上取整為11,故而radix-tree樹深度為11*/
#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, RADIX_TREE_MAP_SHIFT))/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
#define ROOT_IS_IDR ((__force gfp_t)(1 << __GFP_BITS_SHIFT)) //1<<25
#define ROOT_TAG_SHIFT (__GFP_BITS_SHIFT + 1) //25+1=26Linux radix-tree的存儲鍵值index是無符號長整型,如果每次右移一個固定步長后,樹的深度加一,則樹的深度也是固定的(比特位數/步長)。
而 RADIX_TREE_MAP_SHIFT 就是bit位的步長,Linux默認值為6,其含義是每右移6 bit為一個單位。數據結構:
/** @count is the count of every non-NULL element in the ->slots array* whether that is an exceptional entry, a retry entry, a user pointer,* a sibling entry or a pointer to the next level of the tree.* @exceptional is the count of every element in ->slots which is* either radix_tree_exceptional_entry() or is a sibling entry for an* exceptional entry.*/
/*radix-tree 成員節點*/
struct radix_tree_node {/*該層級上的shift值,最底層節點shift值為0,其父節點shift值為6,依次網上類推*/unsigned char shift; /* Bits remaining in each slot *//*該節點在父節點slots[]中的偏移位置*/unsigned char offset; /* Slot offset in parent *//*slots[]中非NULL元素個數*/unsigned char count; /* Total entry count */unsigned char exceptional; /* Exceptional entry count *//*父節點*/struct radix_tree_node *parent; /* Used when ascending tree *//*所屬radix-tree 根*/struct radix_tree_root *root; /* The tree we belong to */union {struct list_head private_list; /* For tree user */struct rcu_head rcu_head; /* Used when freeing node */};/*slots[]存放子節點*/void __rcu *slots[RADIX_TREE_MAP_SIZE]; //64/*tags作用??*/unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; //tags[3][1]
};如果用1個bit標記1個slots[]的狀態,64個slots[]剛好需要1個64bit的變量。
一個slot如果有3個狀態,當然需要3個64bit的變量。這就是unsignedlong tags[3]的來歷。在page cache radix tree中,三個狀態定義如下:/** Radix-tree tags, for tagging dirty and writeback pages within the pagecache radix trees*/
#define PAGECACHE_TAG_DIRTY 0
#define PAGECACHE_TAG_WRITEBACK 1
#define PAGECACHE_TAG_TOWRITE 2
tags[0][0]的64bit用來標記page 的dirty狀態,bit0-63的值分別對應slots[]0-63的狀態
tags[1][0]的64bit用來標記page 的WRITEBACK狀態,bit0-63的值分別對應slots[]0-63的狀態
tags[2][0]的64bit用來標記page 的TOWRITE狀態,bit0-63的值分別對應slots[]0-63的狀態/*radix-tree 根節點*/
struct radix_tree_root {gfp_t gfp_mask;struct radix_tree_node __rcu *rnode;
};
2、接口實現
2.1、準備工作
linux內核通過kmem_cache_create為radix-tree提前分配了一塊slab內存空間(radix_tree_node_cachep)用于快速分配struct radix_tree_node節點。
又通過radix_tree_node_cachep提前為每個CPU分配了一些struct radix_tree_node節點,通過struct radix_tree_preload將其串聯起來進行管理。
void __init radix_tree_init(void)
{int ret;BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);radix_tree_node_cachep = kmem_cache_create("radix_tree_node",sizeof(struct radix_tree_node), 0,SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,radix_tree_node_ctor);/*計算不同height時radix-tree中節點的總數,不包括root,保存在height_to_maxnodes[]中*/radix_tree_init_maxnodes();ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",NULL, radix_tree_cpu_dead);WARN_ON(ret < 0);
}/*根據height值計算radix-tree中index的最大值*/
static __init unsigned long __maxindex(unsigned int height)
{unsigned int width = height * RADIX_TREE_MAP_SHIFT; /*height * 6*/int shift = RADIX_TREE_INDEX_BITS - width; /*64 - 6*height*/if (shift < 0)return ~0UL;if (shift >= BITS_PER_LONG)return 0UL;return ~0UL >> shift;
}/*計算不同height時radix-tree中節點的總數,不包括root*/
static __init void radix_tree_init_maxnodes(void)
{unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; //12unsigned int i, j;for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)height_to_maxindex[i] = __maxindex(i); /*0,64-1,4096-1,....height為不同值時,radix-tree樹中能夠存放的index的最大值*/for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {for (j = i; j > 0; j--)height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;}
}/** Per-cpu pool of preloaded nodes*/
struct radix_tree_preload {unsigned nr;/* nodes->parent points to next preallocated node */struct radix_tree_node *nodes;
};
/*per-cpu變量 radix_tree_preloads*/
static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };/*每個CPU上預先分配21個struct radix_tree_node節點*/
#define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1) //11*2-1=21/** Load up this CPU's radix_tree_node buffer with sufficient objects to* ensure that the addition of a single element in the tree cannot fail. On* success, return zero, with preemption disabled. On error, return -ENOMEM* with preemption not disabled.** To make use of this facility, the radix tree must be initialised without* __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().*/
int radix_tree_preload(gfp_t gfp_mask)
{/* Warn on non-sensical(無意義) use... */WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
}/** Load up this CPU's radix_tree_node buffer with sufficient objects to* ensure that the addition of a single element in the tree cannot fail. On* success, return zero, with preemption disabled. On error, return -ENOMEM* with preemption not disabled.** To make use of this facility, the radix tree must be initialised without* __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().*/
static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
{struct radix_tree_preload *rtp;struct radix_tree_node *node;int ret = -ENOMEM;/** Nodes preloaded by one cgroup can be be used by another cgroup, so* they should never be accounted to any particular memory cgroup.*/gfp_mask &= ~__GFP_ACCOUNT;preempt_disable(); /*因為要訪問percpu變量,這里需要禁止搶占,防止訪問percpu變量過程中,執行線程遷移到其他cpu上運行*/rtp = this_cpu_ptr(&radix_tree_preloads);while (rtp->nr < nr) {preempt_enable(); /*分配內存過程中,可能出現阻塞,所以在調用內存分配函數之前,使能搶占 *//*預先分配radix-tree節點*/node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);if (node == NULL)goto out;preempt_disable(); /*分配內存完成,需要重新禁止搶占,重新獲取percpu變量,也需要重新判斷percpu內存池的內存對象是否充足*/rtp = this_cpu_ptr(&radix_tree_preloads);if (rtp->nr < nr) {/*預先分配的radix-tree節點串聯起來*/node->parent = rtp->nodes;rtp->nodes = node;rtp->nr++;} else {kmem_cache_free(radix_tree_node_cachep, node);}}ret = 0;
out:return ret;
}/*radix_tree_preload和radix_tree_preload_end配對使用*/
static inline void radix_tree_preload_end(void)
{preempt_enable();
}如下示例:
radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
spin_lock(&im->ino_lock);
...
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
2.2、初始化radix-tree root
#define RADIX_TREE_INIT(mask) { \.gfp_mask = (mask), \.rnode = NULL, \}/*初始化radix_tree root節點*/#define RADIX_TREE(name, mask) \struct radix_tree_root name = RADIX_TREE_INIT(mask)#define INIT_RADIX_TREE(root, mask) \do { \(root)->gfp_mask = (mask); \(root)->rnode = NULL; \} while (0)
2.3、往radix-tree中插入節點
/*向radix-tree中插入節點,index為鍵值,entry為index存儲的對應的指針即item*/
static inline int radix_tree_insert(struct radix_tree_root *root,unsigned long index, void *entry)
{return __radix_tree_insert(root, index, 0, entry); /*order參數為0*/
}/*** __radix_tree_insert - insert into a radix tree* @root: radix tree root* @index: index key* @order: key covers the 2^order indices(指數) around index,Key覆蓋了index周圍的2^order指數的index,連續多個index(從index起始)指向同一個item* @item: item to insert** Insert an item into the radix tree at position @index.*/
int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, unsigned order, void *item)
{struct radix_tree_node *node;void __rcu **slot;int error;/*item為radix-tree的內部節點則bug*/BUG_ON(radix_tree_is_internal_node(item));/*創建radix-tree節點,做的事情很多,終歸是將inex對應的節點創建到radix-tree中nodep指向radix-tree中index對應的最底層的節點;slotp指向radix-tree中index對應的最底層節點的slots[];*/error = __radix_tree_create(root, index, order, &node, &slot);if (error)return error;/*將item數據插入到radix-tree 最低層節點slots[]對應位置中*/error = insert_entries(node, slot, item, order, false);if (error < 0)return error;if (node) {unsigned offset = get_slot_offset(node, slot);BUG_ON(tag_get(node, 0, offset));BUG_ON(tag_get(node, 1, offset));BUG_ON(tag_get(node, 2, offset));} else {BUG_ON(root_tags_get(root));}return 0;
}/*** __radix_tree_create - create a slot in a radix tree* @root: radix tree root* @index: index key* @order: index occupies 2^order aligned slots* @nodep: returns node* @slotp: returns slot** Create, if necessary, and return the node and slot for an item* at position @index in the radix tree @root.** Until there is more than one item in the tree, no nodes are* allocated and @root->rnode is used as a direct slot instead of* pointing to a node, in which case *@nodep will be NULL.** Returns -ENOMEM, or 0 for success.*/
int __radix_tree_create(struct radix_tree_root *root, unsigned long index,unsigned order, struct radix_tree_node **nodep,void __rcu ***slotp)
{struct radix_tree_node *node = NULL, *child;void __rcu **slot = (void __rcu **)&root->rnode; /*如果root為剛剛聲明的radix-tree,那么root->rnode則為NULL*/unsigned long maxindex;unsigned int shift, offset = 0;unsigned long max = index | ((1UL << order) - 1); /*order用于計算覆蓋范圍*/gfp_t gfp = root_gfp_mask(root);/*如果radix-tree root為剛剛聲明的radix-tree,那么maxindex=0,child=NULL,shift=0否則返回radix-tree樹能夠存儲的最大index值(maxindex),root->rnode(child))以及返回shift(root當前shift+RADIX_TREE_MAP_SHIFT)值*/shift = radix_tree_load_root(root, &child, &maxindex);/* Make sure the tree is high enough. */if (order > 0 && max == ((1UL << order) - 1)) /*index已經占滿*/max++;if (max > maxindex) {/*max > maxindex 且root->rnode存在則需要對radix-tree進行擴展,返回當前radix-tree shift+RADIX_TREE_MAP_SHIFT的值如果root->rnode不存在則直接返回shift+RADIX_TREE_MAP_SHIFT的值,后續創建節點來完成擴展*/int error = radix_tree_extend(root, gfp, max, shift); /*shift值比實際大RADIX_TREE_MAP_SHIFT*/if (error < 0)return error;shift = error;child = rcu_dereference_raw(root->rnode);}while (shift > order) {shift -= RADIX_TREE_MAP_SHIFT; /*前面傳出來shift+RADIX_TREE_MAP_SHIFT值,這里進行-RADIX_TREE_MAP_SHIFT*/if (child == NULL) {/* Have to add a child node. */child = radix_tree_node_alloc(gfp, node, root, shift,offset, 0, 0);if (!child)return -ENOMEM;rcu_assign_pointer(*slot, node_to_entry(child)); /*slot指向新節點,netry值*/if (node)node->count++;} else if (!radix_tree_is_internal_node(child))break;/* Go a level down */node = entry_to_node(child);/*計算出index在parent slots[]中的偏移位置(返回值)以及slots[offset]值(nodep保存)*/offset = radix_tree_descend(node, &child, index);slot = &node->slots[offset]; /*index在父節點對應的slots[]*/}if (nodep) /*nodep指向radix-tree中index對應的最底層的節點*/*nodep = node;if (slotp)*slotp = slot; /*slotp指向radix-tree中index對應的最底層節點的slots[]*/return 0;
}/*
獲取radix-tree樹當前能夠存檔的maxindex,以及返回當前shift+RADIX_TREE_MAP_SHIFT
maxindex:radix-tree 能夠存放的maxindex
nodep:指向root->rnode
*/
static unsigned radix_tree_load_root(const struct radix_tree_root *root,struct radix_tree_node **nodep, unsigned long *maxindex)
{struct radix_tree_node *node = rcu_dereference_raw(root->rnode);*nodep = node;/*root->rnode為內部節點情況,說明root->rnode存在指向內容*/if (likely(radix_tree_is_internal_node(node))) {node = entry_to_node(node);/*radix-tree樹當前能夠存儲的最大鍵值*/*maxindex = node_maxindex(node);return node->shift + RADIX_TREE_MAP_SHIFT; /*為什么要+RADIX_TREE_MAP_SHIFT???*/}*maxindex = 0;return 0;
}static inline bool radix_tree_is_internal_node(void *ptr)
{return ((unsigned long)ptr & RADIX_TREE_ENTRY_MASK) == RADIX_TREE_INTERNAL_NODE;
} /*清除內部節點屬性*/
static inline struct radix_tree_node *entry_to_node(void *ptr)
{return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
}/*添加內部節點屬性*/
static inline void *node_to_entry(void *ptr)
{return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
}#define RADIX_TREE_RETRY node_to_entry(NULL) /** The maximum index which can be stored in a radix tree*/
static inline unsigned long shift_maxindex(unsigned int shift)
{/*shift值必然是 RADIX_TREE_MAP_SHIFT 的整數倍,因為是以RADIX_TREE_MAP_SHIFT 為步長進行增加(100 0000 << shift)-1,計算出node當前能夠存儲的最大index值*/return (RADIX_TREE_MAP_SIZE << shift) - 1;
}/*node->shift標識node的shift值,計算出node當前能夠存儲的最大index值*/
static inline unsigned long node_maxindex(const struct radix_tree_node *node)
{return shift_maxindex(node->shift);
}/** Extend a radix tree so it can store key @index.*/
static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,unsigned long index, unsigned int shift)
{void *entry;unsigned int maxshift;int tag;/* Figure out what the shift should be. */maxshift = shift;/*計算出index對應的shift值*/while (index > shift_maxindex(maxshift))maxshift += RADIX_TREE_MAP_SHIFT; /*shift每次以RADIX_TREE_MAP_SHIFT為步長進行增加*/entry = rcu_dereference_raw(root->rnode);if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))goto out;/*root->rnode都不存在且(非idr或則root->gfp_mask IDR_FREE置位),退出,直接根據shift值創建新節點無須進行擴展處理*//*由于需要對radix-tree進行擴展,需要增加新節點以及深度,必然會對之前已經建立關聯的radxir-tree進行挪動;挪動其實很簡單只需要讓老radix-tree root->rnode指向新節點中的slots[0],同時root->rnode指向新節點;深度從shift擴展到maxshift;*/do {/*因為radix-tree root->rnode會指向新節點中的slots[0],所以新node中的count設置為1*/struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL/*parent*/,root, shift, 0/*offset*/, 1/*count*/, 0/*exceptional*/);if (!node)return -ENOMEM;if (is_idr(root)) { /*!!(root->gfp_mask & ROOT_IS_IDR);*/all_tag_set(node, IDR_FREE); /*node->tags[IDR_FREE]全部置位*/if (!root_tag_get(root, IDR_FREE)) { /*root->gfp_mask & (1 << (IDR_FREE + ROOT_TAG_SHIFT))*/tag_clear(node, IDR_FREE, 0); /*/*清除node->tags[IDR_FREE] bit 0,擴展意味著之前radix-tree中無剩余空間*/*/root_tag_set(root, IDR_FREE); /*root設置IDR_FREE標志*/}} else { /*非idr*//* Propagate the aggregated tag info to the new child */for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {if (root_tag_get(root, tag)) /*root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT))*/tag_set(node, tag, 0); /*將node->tags[tag] bit 0 值位*/}}BUG_ON(shift > BITS_PER_LONG);if (radix_tree_is_internal_node(entry)) {entry_to_node(entry)->parent = node;} else if (radix_tree_exceptional_entry(entry)) {/* Moving an exceptional root->rnode to a node */node->exceptional = 1;}/** entry was already in the radix tree, so we do not need* rcu_assign_pointer here*/node->slots[0] = (void __rcu *)entry; /*擴展后,之前radix-tree指向擴展節點的slots[0],注意slots[0]值是entry*/entry = node_to_entry(node);rcu_assign_pointer(root->rnode, entry); /*radix-tree root->rnode指向新節點即可,也是entry*/shift += RADIX_TREE_MAP_SHIFT; /*繼續進行擴展*/} while (shift <= maxshift);
out:return maxshift + RADIX_TREE_MAP_SHIFT; /*為什么返回的shift值都+RADIX_TREE_MAP_SHIFT??*/
}/*計算出index在parent slots[]中的偏移位置(返回值)以及slots[offset](nodep保存)*/
static unsigned int radix_tree_descend(const struct radix_tree_node *parent,struct radix_tree_node **nodep, unsigned long index)
{/*計算出index在parent slosts[]中的offset偏移值*/unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;/*對應的slots[]中的值*/void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);#ifdef CONFIG_RADIX_TREE_MULTIORDER/*multiorder情況下,slots[]中存放的未必是值*/if (radix_tree_is_internal_node(entry)) {if (is_sibling_entry(parent, entry)) { /*兄弟節點存放最開始slots[]地址轉為entry的地址*/void __rcu **sibentry;sibentry = (void __rcu **) entry_to_node(entry);offset = get_slot_offset(parent, sibentry);entry = rcu_dereference_raw(*sibentry); }}
#endif*nodep = (void *)entry; /*slots[offset]中的內容*/return offset;
}/** This assumes that the caller has performed appropriate preallocation, and* that the caller has pinned this thread of control to the current CPU.*/
static struct radix_tree_node * radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,struct radix_tree_root *root,unsigned int shift, unsigned int offset,unsigned int count, unsigned int exceptional)
{struct radix_tree_node *ret = NULL;/** Preload code isn't irq safe and it doesn't make sense to use* preloading during an interrupt anyway as all the allocations have* to be atomic. So just do normal allocation when in interrupt.*//*不支持blocking且不在中斷上下文中*/if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {struct radix_tree_preload *rtp;/** Even if the caller has preloaded, try to allocate from the* cache first for the new node to get accounted to the memory* cgroup.*/ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask | __GFP_NOWARN);if (ret)goto out;/** Provided the caller has preloaded here, we will always* succeed in getting a node here (and never reach* kmem_cache_alloc)*//*radix_tree_preloads為per-cpu變量,管理預先分配的radix-tree節點*/rtp = this_cpu_ptr(&radix_tree_preloads);if (rtp->nr) {/*從預先分配的radix-tree pool中取一個節點*/ret = rtp->nodes;rtp->nodes = ret->parent;rtp->nr--;}/** Update the allocation stack trace as this is more useful* for debugging.*/kmemleak_update_trace(ret);goto out;}ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
out:BUG_ON(radix_tree_is_internal_node(ret));if (ret) {/*賦值*/ret->shift = shift;ret->offset = offset;ret->count = count;ret->exceptional = exceptional;ret->parent = parent;ret->root = root;}return ret;
}#ifdef CONFIG_RADIX_TREE_MULTIORDER
/*
CONFIG_RADIX_TREE_MULTIORDER=y 成立
MULTIORDER指index周圍2^order范圍內的slots[]存放同一個item值
*/
static inline int insert_entries(struct radix_tree_node *node,void __rcu **slot, void *item, unsigned order, bool replace)
{struct radix_tree_node *child;unsigned i, n, tag, offset, tags = 0;/*n記錄index周圍2^order范圍內的數量*/if (node) { /*最底層節點存在,node->shift為0*/if (order > node->shift) /*order:index周圍2^order范圍內的數量*/n = 1 << (order - node->shift);elsen = 1;offset = get_slot_offset(node, slot); /*slots[]內部偏移*/} else { /*最底層節點不存在,直接放在root->rnode中slots[0]中*/n = 1;offset = 0;}if (n > 1) { /*index覆蓋多個*/offset = offset & ~(n - 1); /*offset進行對齊*/slot = &node->slots[offset]; /*開始位置對應的slots*/}child = node_to_entry(slot); /*注意,slots[]由node轉為entry*/for (i = 0; i < n; i++) {if (slot[i]) { /*slots[i]中已經有item存在*/if (replace) { /*replace為TRUE,進行替換*/node->count--; /*統計計算*/for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)if (tag_get(node, tag, offset + i)) //node->tags[tag]的offset+1 bit是否置位tags |= 1 << tag;} elsereturn -EEXIST; /*不替換則返回存在錯誤值:-EEXIST*/}}for (i = 0; i < n; i++) {struct radix_tree_node *old = rcu_dereference_raw(slot[i]);/*index周圍2^order范圍內的slots[]中,只有最開始的slots[]中存放item數據,其他 sibling slots[]中存放最開始slots[]的地址*/if (i) { // i!=0rcu_assign_pointer(slot[i], child); /*注意,child為entry,將最開始的slots[]地址存入slots[i]中,將在multiorder情況下,sibling slots[]中存放開始slots[]的地址且為內部節點*/for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)if (tags & (1 << tag))tag_clear(node, tag, offset + i); /*清除tag值*/} else { //i為0rcu_assign_pointer(slot[i], item); /*直接將item存入slots[i]中*/for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)if (tags & (1 << tag))tag_set(node, tag, offset); /*設置tag值*/}if (radix_tree_is_internal_node(old) &&!is_sibling_entry(node, old) &&(old != RADIX_TREE_RETRY))radix_tree_free_nodes(old); /*old中的內容已經無用,調用radix_tree_free_nodes()采用深度優先的方法將old下面的所有節點都釋放掉*/if (radix_tree_exceptional_entry(old))node->exceptional--;}if (node) {node->count += n; /*增加計數值*/if (radix_tree_exceptional_entry(item))node->exceptional += n;}return n;
}
#else
static inline int insert_entries(struct radix_tree_node *node,void __rcu **slot, void *item, unsigned order, bool replace)
{if (*slot)return -EEXIST;rcu_assign_pointer(*slot, item); /*radix-tree index對應的最底層的節點的slots[]存放item數據 */if (node) {node->count++;if (radix_tree_exceptional_entry(item))node->exceptional++;}return 1;
}
#endif/** Free any nodes below this node.釋放此節點以下的所有節點。* The tree is presumed to not need shrinking, and any user data in the tree is presumed to not need a destructor called on it.* 假定樹不需要收縮,并且假定樹中的任何用戶數據都不需要對其調用析構函數* If we need to add a destructor, we can add that functionality later.* Note that we may not clear tags or slots from the tree as an RCU walker may still have a pointer into this subtree.* We could replace the entries with RADIX_TREE_RETRY,* but we'll still have to clear those in rcu_free.*/
/*radix_tree_free_nodes是一個非常粗暴的方式直接將node下的所有節點直接釋放掉*/
static void radix_tree_free_nodes(struct radix_tree_node *node)
{unsigned offset = 0;struct radix_tree_node *child = entry_to_node(node);/*深度優先的方法將node下面的所有節點都釋放掉*/for (;;) {void *entry = rcu_dereference_raw(child->slots[offset]);/*只有multiorder下才存在sibling節點,sibling節點不用釋放其下的節點由第一個sibling節點釋放即可;radix_tree_is_internal_node(entry):確保entry節點有效,無效則offset++,判斷下個slots[];!is_sibling_entry(child, entry):確保entry不為sibling即其存在子節點;*/if (radix_tree_is_internal_node(entry) && !is_sibling_entry(child, entry)) {child = entry_to_node(entry); /*處理entry的子節點*/offset = 0;continue;}offset++; /*如果entry無效或則entry為sibling即其無子節點則offset++,判斷下一個slots[]*/while (offset == RADIX_TREE_MAP_SIZE) { /*child節點的所有slots[]都已經處理完,child本身可以釋放掉了*/struct radix_tree_node *old = child;offset = child->offset + 1;child = child->parent; /*返回到父節點從父節點slots[] child->offset + 1位置繼續判斷*/WARN_ON_ONCE(!list_empty(&old->private_list));/*通過call_rcu 注冊一個回調函數,當所有現存的讀訪問完成后,調用這個回調函數注銷舊數據*/radix_tree_node_free(old);if (old == entry_to_node(node)) /*回到了node自身,node下面所有子節點都處理完畢,退出*/return;}}
}
2.4、從radix-tree中刪除index
/*** radix_tree_delete - delete an entry from a radix tree* @root: radix tree root* @index: index key** Remove the entry at @index from the radix tree rooted at @root.** Return: The deleted entry, or %NULL if it was not present.*/
/*刪除index鍵值對應的item,radix-tree內部根據情況判斷是否刪除index索引路徑上的節點以及進行radix-tree shrink操作*/
void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
{return radix_tree_delete_item(root, index, NULL);
}/*** radix_tree_delete_item - delete an item from a radix tree* @root: radix tree root* @index: index key* @item: expected item** Remove @item at @index from the radix tree rooted at @root.** Return: the deleted entry, or %NULL if it was not present* or the entry at the given @index was not @item.*/
void *radix_tree_delete_item(struct radix_tree_root *root,unsigned long index, void *item)
{struct radix_tree_node *node = NULL;void __rcu **slot = NULL;void *entry;/*計算出index在parent slosts[]中的offset偏移值以及最底層的節點node,并返回對應的slots[]中的值(返回值)*/entry = __radix_tree_lookup(root, index, &node, &slot);if (!slot)return NULL;if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,get_slot_offset(node, slot))))return NULL;if (item && entry != item)return NULL;/*1.將index對應slots[]中的itemi設置為NULL;2.根據情況處理node節點:如果node->count > 0,說明node中還存儲有其他item,不能直接刪除node節點。如果node->count =0,則可以直接刪除node節點,假設如果node是parent的唯一子節點的話,node被刪除了,parent其實也是可以刪除的,同時從下往上嘗試刪除其他無用的節點。如果node為root->rnode時可以對radix-tree進行shrink操作。
*/__radix_tree_delete(root, node, slot);return entry;
}/*** __radix_tree_lookup -lookup an item in a radix tree* @root: radix tree root* @index: index key* @nodep: returns node* @slotp: returns slot** Lookup and return the item at position @index in the radix* tree @root.** Until there is more than one item in the tree, no nodes are* allocated and @root->rnode is used as a direct slot instead of* pointing to a node, in which case *@nodep will be NULL.*/
void *__radix_tree_lookup(const struct radix_tree_root *root,unsigned long index, struct radix_tree_node **nodep,void __rcu ***slotp)
{struct radix_tree_node *node, *parent;unsigned long maxindex;void __rcu **slot;restart:parent = NULL;slot = (void __rcu **)&root->rnode;/*radixr-tree maxindex以及root->rnode節點*/radix_tree_load_root(root, &node, &maxindex);if (index > maxindex)return NULL;/*一層接一層地往下找到index在radix-tree中最底層的節點以及對應的slots[]*/while (radix_tree_is_internal_node(node)) {unsigned offset;if (node == RADIX_TREE_RETRY)goto restart;parent = entry_to_node(node);/*計算出index在parent slosts[]中的offset偏移值以及對應的slots[]中的值存在node中*/offset = radix_tree_descend(parent, &node, index);slot = parent->slots + offset;}if (nodep)*nodep = parent;if (slotp)*slotp = slot;return node;
}/*1.將index對應slots[]中的itemi設置為NULL;2.根據情況處理node節點:如果node->count > 0,說明node中還存儲有其他item,不能直接刪除node節點。如果node->count =0,則可以直接刪除node節點,假設如果node是parent的唯一子節點的話,node被刪除了,parent其實也是可以刪除的,同時從下往上嘗試刪除其他無用的節點。如果node為root->rnode時可以對radix-tree進行shrink操作。
*/
static bool __radix_tree_delete(struct radix_tree_root *root,struct radix_tree_node *node, void __rcu **slot)
{void *old = rcu_dereference_raw(*slot);int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;unsigned offset = get_slot_offset(node, slot);int tag;if (is_idr(root)) /*idr情況*/node_tag_set(root, node, IDR_FREE, offset); /*如果node!=NULL,則遞歸設置node及其父節點tags[tag] offset位否則設置root gfp_mask 該tag位*/elsefor (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)node_tag_clear(root, node, tag, offset);/*處理index對應的item*/replace_slot(slot, NULL, node, -1, exceptional); /*count為-1,設置slots[] 為NULL*//*如果node->count > 0,說明node中還存儲有其他item,不能直接刪除node節點。如果node為root->rnode時可以對radix-tree進行shrink操作。如果node->count =0,則可以直接刪除node節點,假設如果node是parent的唯一子節點的話,node被刪除了,parent其實也是可以刪除的,同時從下往上嘗試刪除其他無用的節點。*/return node && delete_node(root, node, NULL, NULL);
}static void replace_slot(void __rcu **slot, void *item,struct radix_tree_node *node, int count, int exceptional)
{if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))return;/*node存在且count或exception為true*/if (node && (count || exceptional)) {node->count += count;node->exceptional += exceptional;/*count < 0,將sibling的slots[]設置為NULL否則不做處理*/replace_sibling_entries(node, slot, count, exceptional);}/*將slots[]設置為item(NULL)*/rcu_assign_pointer(*slot, item);
}static inline void replace_sibling_entries(struct radix_tree_node *node,void __rcu **slot, int count, int exceptional)
{
#ifdef CONFIG_RADIX_TREE_MULTIORDERvoid *ptr = node_to_entry(slot);unsigned offset = get_slot_offset(node, slot) + 1;while (offset < RADIX_TREE_MAP_SIZE) {if (rcu_dereference_raw(node->slots[offset]) != ptr) /*非sibling時則break*/break;if (count < 0) {node->slots[offset] = NULL; /*將sibling的slots[]設置為NULL*/node->count--;}node->exceptional += exceptional;offset++;}
#endif
}/*
如果node->count > 0,說明node中還存儲有其他item,不能直接刪除node節點。如果node為root->rnode時可以對radix-tree進行shrink操作。
如果node->count =0,則可以直接刪除node節點,假設如果node是parent的唯一子節點的話,node被刪除了,parent其實也是可以刪除的,同時從下往上嘗試刪除其他無用的節點。*/
static bool delete_node(struct radix_tree_root *root,struct radix_tree_node *node,radix_tree_update_node_t update_node, void *private)
{bool deleted = false;do {struct radix_tree_node *parent;if (node->count) { /*因為node中還存儲有其他item,不能直接將node連根拔起,可以對radix-tree進行shrink*/if (node_to_entry(node) == rcu_dereference_raw(root->rnode)) /*只有刪除的node為root->rnode時才能進行shrink操作*/deleted |= radix_tree_shrink(root, update_node,private); /*進行shrink操作*/return deleted;}/*node中沒有存儲其他item,可以刪除node*/parent = node->parent;if (parent) {/*處理其父節點*/parent->slots[node->offset] = NULL;parent->count--;} else {/** Shouldn't the tags already have all been cleared* by the caller?*/if (!is_idr(root))root_tag_clear_all(root);root->rnode = NULL;}WARN_ON_ONCE(!list_empty(&node->private_list));radix_tree_node_free(node); /*直接釋放node*/deleted = true;node = parent; /*繼續處理,假設如果node是parent的唯一子節點的話,node被刪除了,parent其實也是可以刪除的*/} while (node);return deleted;
}/*** radix_tree_shrink - shrink radix tree to minimum height* @root radix tree root*/
static inline bool radix_tree_shrink(struct radix_tree_root *root, radix_tree_update_node_t update_node, void *private)
{bool shrunk = false;for (;;) {/*shrink只能在root->rnode這一級上進行處理,這一級上只存在一個節點好處理*/struct radix_tree_node *node = rcu_dereference_raw(root->rnode);struct radix_tree_node *child;if (!radix_tree_is_internal_node(node))break;node = entry_to_node(node);/** The candidate node has more than one child, or its child* is not at the leftmost slot, or the child is a multiorder* entry, we cannot shrink.*/if (node->count != 1) /*The candidate node has more than one child*/break;/*在最左邊意味者存在slots[0]位置且是node的唯一子節點,對于child而言其高位為0所以存放在slots[0]位置;所以將node刪除,對一繼續尋找child是沒有影響的。*/child = rcu_dereference_raw(node->slots[0]); /*its child is not at the leftmost slot*/if (!child)break;if (!radix_tree_is_internal_node(child) && node->shift) /*child is a multiorder entry,*/break;if (radix_tree_is_internal_node(child))entry_to_node(child)->parent = NULL;/** We don't need rcu_assign_pointer(), since we are simply* moving the node from one part of the tree to another: if it* was safe to dereference the old pointer to it* (node->slots[0]), it will be safe to dereference the new* one (root->rnode) as far as dependent read barriers go.*/root->rnode = (void __rcu *)child; /*root指向child,這個和擴展時操作是逆操作*/if (is_idr(root) && !tag_get(node, IDR_FREE, 0))root_tag_clear(root, IDR_FREE);/** We have a dilemma here. The node's slot[0] must not be* NULLed in case there are concurrent lookups expecting to* find the item. However if this was a bottom-level node,* then it may be subject to the slot pointer being visible* to callers dereferencing it. If item corresponding to* slot[0] is subsequently deleted, these callers would expect* their slot to become empty sooner or later.** For example, lockless pagecache will look up a slot, deref* the page pointer, and if the page has 0 refcount it means it* was concurrently deleted from pagecache so try the deref* again. Fortunately there is already a requirement for logic* to retry the entire slot lookup -- the indirect pointer* problem (replacing direct root node with an indirect pointer* also results in a stale slot). So tag the slot as indirect* to force callers to retry.*/node->count = 0; /*node->count置為0*/if (!radix_tree_is_internal_node(child)) {node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;if (update_node)update_node(node, private);}WARN_ON_ONCE(!list_empty(&node->private_list));radix_tree_node_free(node); /*直接釋放node*/shrunk = true;}return shrunk;
}static void radix_tree_node_rcu_free(struct rcu_head *head)
{struct radix_tree_node *node =container_of(head, struct radix_tree_node, rcu_head);/** Must only free zeroed nodes into the slab. We can be left with* non-NULL entries by radix_tree_free_nodes, so clear the entries* and tags here.*/memset(node->slots, 0, sizeof(node->slots));memset(node->tags, 0, sizeof(node->tags));INIT_LIST_HEAD(&node->private_list);kmem_cache_free(radix_tree_node_cachep, node);
}static inline void radix_tree_node_free(struct radix_tree_node *node)
{/*注冊一個回調函數,當所有現存的讀訪問完成后,調用這個回調函數注銷舊數據*/call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
}注意區分radix_tree_node_free和radix_tree_free_nodes的差異:
radix_tree_node_free釋放該節點的radix_tree_node節點內存空間;
radix_tree_free_nodes是一個非常粗暴的方式直接將node下的所有節點直接釋放掉;
2.5、從radix-tree中查找index,返回其item
/*** radix_tree_lookup - perform lookup operation on a radix tree* @root: radix tree root* @index: index key** Lookup the item at the position @index in the radix tree @root.** This function can be called under rcu_read_lock, however the caller* must manage lifetimes of leaf nodes (eg. RCU may also be used to free* them safely). No RCU barriers are required to access or modify the* returned item, however.*/
/*在radix-tree中查找index,返回index在radix-tree中的item值*/
void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
{/*一層接一層地往下找到index在radix-tree中最底層的節點,對應的slots[],以及slots[]中的值*/return __radix_tree_lookup(root, index, NULL, NULL);
}/*** __radix_tree_lookup - lookup an item in a radix tree* @root: radix tree root* @index: index key* @nodep: returns node* @slotp: returns slot** Lookup and return the item at position @index in the radix* tree @root.** Until there is more than one item in the tree, no nodes are* allocated and @root->rnode is used as a direct slot instead of* pointing to a node, in which case *@nodep will be NULL.*/
void *__radix_tree_lookup(const struct radix_tree_root *root,unsigned long index, struct radix_tree_node **nodep,void __rcu ***slotp)
{struct radix_tree_node *node, *parent;unsigned long maxindex;void __rcu **slot;restart:parent = NULL;slot = (void __rcu **)&root->rnode;/*radixr-tree maxindex以及root->rnode節點*/radix_tree_load_root(root, &node, &maxindex);if (index > maxindex)return NULL;/*一層接一層地往下找到index在radix-tree中最底層的節點以及對應的slots[]*/while (radix_tree_is_internal_node(node)) {unsigned offset;if (node == RADIX_TREE_RETRY)goto restart;parent = entry_to_node(node);/*計算出index在parent slosts[]中的offset偏移值以及對應的slots[]中的值存在node中*/offset = radix_tree_descend(parent, &node, index);slot = parent->slots + offset;}if (nodep)*nodep = parent;if (slotp)*slotp = slot;return node;
}
2.8、遍歷radix-tree中的index/*** radix_tree_for_each_slot - iterate over non-empty slots** @slot: the void** variable for pointer to slot* @root: the struct radix_tree_root pointer* @iter: the struct radix_tree_iter pointer* @start: iteration starting index** @slot points to radix tree slot, @iter->index contains its index.*/
/*
radix_tree_next_chunk:在root中尋找滿足flags條件的有效節點(slots[offset]!=NULL),最終找到的index允許 >= 參數index
radix_tree_next_slot:在radix_tree_next_chunk設置的迭代器(iter)index~next_index chunk范圍內查找滿足條件的slot
以參數start作為開始index查找整個radix-tree中的有效且無tagged slots[]
*/
#define radix_tree_for_each_slot(slot, root, iter, start) \for (slot = radix_tree_iter_init(iter, start) ; \slot || (slot = radix_tree_next_chunk(root, iter, 0)) ; \slot = radix_tree_next_slot(slot, iter, 0))/*** radix_tree_for_each_contig - iterate over contiguous slots** @slot: the void** variable for pointer to slot* @root: the struct radix_tree_root pointer* @iter: the struct radix_tree_iter pointer* @start: iteration starting index** @slot points to radix tree slot, @iter->index contains its index.*/
/*
RADIX_TREE_ITER_CONTIG:stop at first hole
radix_tree_for_each_contig和radix_tree_for_each_slot功能類似,只不過如果在chunk內出現無效slots[]則退出,不再繼續查找
*/
#define radix_tree_for_each_contig(slot, root, iter, start) \for (slot = radix_tree_iter_init(iter, start) ; \slot || (slot = radix_tree_next_chunk(root, iter, \RADIX_TREE_ITER_CONTIG)) ; \slot = radix_tree_next_slot(slot, iter, \RADIX_TREE_ITER_CONTIG))/*** radix_tree_for_each_tagged - iterate over tagged slots** @slot: the void** variable for pointer to slot* @root: the struct radix_tree_root pointer* @iter: the struct radix_tree_iter pointer* @start: iteration starting index* @tag: tag index** @slot points to radix tree slot, @iter->index contains its index.*/
/*
RADIX_TREE_ITER_TAGGED:lookup tagged slots
radix_tree_for_each_tagged和radix_tree_for_each_slot功能類似,其查詢對象是tagged slots[],參數tag為用戶關心的tag值
*/
#define radix_tree_for_each_tagged(slot, root, iter, start, tag) \for (slot = radix_tree_iter_init(iter, start) ; \slot || (slot = radix_tree_next_chunk(root, iter, \RADIX_TREE_ITER_TAGGED | tag)) ; \slot = radix_tree_next_slot(slot, iter, \RADIX_TREE_ITER_TAGGED | tag))radix_tree_next_chunk和radix_tree_next_slot 配合使用。/*** radix_tree_next_chunk - find next chunk of slots for iteration** @root: radix tree root* @iter: iterator state* @flags: RADIX_TREE_ITER_* flags and tag index* Returns: pointer to chunk first slot, or NULL if there no more left** This function looks up the next chunk in the radix tree starting from @iter->next_index.* It returns a pointer to the chunk's first slot.* Also it fills @iter with data about chunk: position in the tree (index),* its end (next_index), and constructs a bit mask for tagged iterating (tags).*/
/*在idr中尋找滿足flags條件的有效節點(slots[offset]!=NULL),最終找到的index允許 >= iter->next_index*/
void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,struct radix_tree_iter *iter, unsigned flags)
{unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; /* tag index in lower nybble */struct radix_tree_node *node, *child;unsigned long index, offset, maxindex;if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))return NULL;/** Catch next_index overflow after ~0UL. iter->index never overflows* during iterating; it can be zero only at the beginning.* And we cannot overflow iter->next_index in a single step,* because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.** This condition also used by radix_tree_next_slot() to stop* contiguous iterating, and forbid switching to the next chunk.*/index = iter->next_index;if (!index && iter->index)return NULL;restart:radix_tree_load_root(root, &child, &maxindex);if (index > maxindex)return NULL;if (!child)return NULL;if (!radix_tree_is_internal_node(child)) { /*radix-tree 只存在一個item*//* Single-slot tree */iter->index = index;iter->next_index = maxindex + 1;iter->tags = 1;iter->node = NULL;__set_iter_shift(iter, 0);return (void __rcu **)&root->rnode;}/*從上往下一層一層的尋找滿足flags條件的index,最終找到的滿足條件的index >= 參數index*/do {node = entry_to_node(child);/*index在當前層級的offset,slots[]*/offset = radix_tree_descend(node, &child, index);/*lookup tagged slots查詢tagged slots,但是node->tags[tag] offset bit未置位情況,借用相鄰slots[]不查詢tagged slots,但是child為NULL情況,借用相鄰slots[]*/if ((flags & RADIX_TREE_ITER_TAGGED) ? !tag_get(node, tag, offset) : !child) {/* Hole detected */if (flags & RADIX_TREE_ITER_CONTIG) /* stop at first hole */return NULL;if (flags & RADIX_TREE_ITER_TAGGED) /*查詢tagged slots,但是node->tags[tag] offset bit未置位情況*/offset = radix_tree_find_next_bit(node, tag,offset + 1); /*從node->tags[tag] offset+1位置開始查找firset set bit,找slots[]中相鄰節點中tagged的節點*/elsewhile (++offset < RADIX_TREE_MAP_SIZE) { /*不查詢tagged slots,但是child為NULL情況,找slots[]中相鄰節點*/void *slot = rcu_dereference_raw(node->slots[offset]);if (is_sibling_entry(node, slot))continue;if (slot) /*相鄰節點不為NULL*/break;}/*上面操作導致index值在當前層級需要修正*/index &= ~node_maxindex(node);index += offset << node->shift; /*修改index*//* Overflow after ~0UL */if (!index)return NULL;if (offset == RADIX_TREE_MAP_SIZE) /*在slots[]中沒能找到滿足條件的相鄰節點*/goto restart; /*最終index > maxinde從而退出*/child = rcu_dereference_raw(node->slots[offset]);}if (!child)goto restart;if (child == RADIX_TREE_RETRY)break;} while (radix_tree_is_internal_node(child));/* Update the iterator state *//*找到滿足條件的index*/iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); /*index設置為在當前node內對應的值*/iter->next_index = (index | node_maxindex(node)) + 1; /*next_index設置為當前node內能夠存儲的最大值+1*/iter->node = node; /*滿足條件的index在最底層的節點*/__set_iter_shift(iter, node->shift);if (flags & RADIX_TREE_ITER_TAGGED) /* lookup tagged slots */set_iter_tags(iter, node, offset, tag); /*設置iter->tags值*/return node->slots + offset; /*返回index對應的item*/
}/*** radix_tree_next_slot - find next slot in chunk** @slot: pointer to current slot* @iter: pointer to interator state* @flags: RADIX_TREE_ITER_*, should be constant* Returns: pointer to next slot, or NULL if there no more left** This function updates @iter->index in the case of a successful lookup.* For tagged lookup it also eats @iter->tags.** There are several cases where 'slot' can be passed in as NULL to this* function. These cases result from the use of radix_tree_iter_resume() or* radix_tree_iter_retry(). In these cases we don't end up dereferencing* 'slot' because either:* a) we are doing tagged iteration and iter->tags has been set to 0, or* b) we are doing non-tagged iteration, and iter->index and iter->next_index* have been set up so that radix_tree_chunk_size() returns 1 or 0.*/
static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags)
{if (flags & RADIX_TREE_ITER_TAGGED) { /* lookup tagged slots */iter->tags >>= 1; /*next slot*/if (unlikely(!iter->tags))return NULL; /*當前剩余slots[]中無tagged,返回NULL*/if (likely(iter->tags & 1ul)) { /*相鄰slots[]滿足條件*/iter->index = __radix_tree_iter_add(iter, 1);slot++;goto found;}/*相鄰slots[]不滿足條件*/if (!(flags & RADIX_TREE_ITER_CONTIG)) { /*first hole不stop,繼續查找 */unsigned offset = __ffs(iter->tags);iter->tags >>= offset++;iter->index = __radix_tree_iter_add(iter, offset);slot += offset;goto found;}/*如果沒有找到返回NULL*/} else { /*不要求為tagged slots*/long count = radix_tree_chunk_size(iter);/*老老實實地一個一個地找*/while (--count > 0) {slot++;iter->index = __radix_tree_iter_add(iter, 1);if (likely(*slot))goto found;if (flags & RADIX_TREE_ITER_CONTIG) { /* stop at first hole *//* forbid switching to the next chunk */iter->next_index = 0;break;}}}return NULL;found:if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot))))return __radix_tree_next_slot(slot, iter, flags);return slot;
}
感興趣的讀者可以自己手動來嘗試下將下面的數據插入基數樹過程中的數據結構變化過程。