blk-mq編程,主要要調用兩個函數進行初始化工作,blk_mq_init_queue這是第二個。該函數先是申請了struct request_queue結構,這個請求隊列后面用于賦值給磁盤那個結構體的相應成員。
struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
{struct request_queue *uninit_q, *q;//分配struct request_queue并初始化uninit_q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node, NULL);if (!uninit_q)return ERR_PTR(-ENOMEM);/*1:分配每個cpu專屬的軟件隊列并初始化2:分配硬件隊列,并初始化3:建立軟件隊列和硬件隊列的聯系*/q = blk_mq_init_allocated_queue(set, uninit_q);if (IS_ERR(q))blk_cleanup_queue(uninit_q);return q;
}
EXPORT_SYMBOL(blk_mq_init_queue);
blk_mq_init_allocated_queue函數分析
一眼望過去,確實有點復雜,不過,多看幾遍就好了,
這里面,主要就是給struct request_queue *q結構體里面的成員變量賦值的,簡單的變量賦值就不分析了,看看它調用的函數進行分析。
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q)
{/* mark the queue as mq asap */q->mq_ops = set->ops;q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, blk_mq_poll_stats_bkt, BLK_MQ_POLL_STATS_BKTS, q);if (!q->poll_cb)goto err_exit;q->queue_ctx = alloc_percpu(struct blk_mq_ctx);//percpu變量 軟件隊列if (!q->queue_ctx)goto err_exit;/* init q->mq_kobj and sw queues' kobjects */blk_mq_sysfs_init(q); //主要是初始化kobject變量//二級指針 硬件隊列q->queue_hw_ctx = kcalloc_node(nr_cpu_ids, sizeof(*(q->queue_hw_ctx)), GFP_KERNEL, set->numa_node);if (!q->queue_hw_ctx)goto err_percpu;//賦值q->mq_map,這個數組保存了每個CPU對應的硬件隊列編號q->mq_map = set->mq_map;blk_mq_realloc_hw_ctxs(set, q);if (!q->nr_hw_queues)goto err_hctxs;/*定時器初始化,設置超時時間*/INIT_WORK(&q->timeout_work, blk_mq_timeout_work);blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);q->nr_queues = nr_cpu_ids;q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;if (!(set->flags & BLK_MQ_F_SG_MERGE))queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);q->sg_reserved_size = INT_MAX;INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);INIT_LIST_HEAD(&q->requeue_list);spin_lock_init(&q->requeue_lock);blk_queue_make_request(q, blk_mq_make_request);if (q->mq_ops->poll)q->poll_fn = blk_mq_poll;/** Do this after blk_queue_make_request() overrides it...*/q->nr_requests = set->queue_depth; //防止被覆蓋/** Default to classic polling*/q->poll_nsec = -1;if (set->ops->complete)blk_queue_softirq_done(q, set->ops->complete);blk_mq_init_cpu_queues(q, set->nr_hw_queues);blk_mq_add_queue_tag_set(set, q);blk_mq_map_swqueue(q);if (!(set->flags & BLK_MQ_F_NO_SCHED)) {int ret;ret = elevator_init_mq(q);if (ret)return ERR_PTR(ret);}return q;
err_hctxs:kfree(q->queue_hw_ctx);
err_percpu:free_percpu(q->queue_ctx);
err_exit:q->mq_ops = NULL;return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(blk_mq_init_allocated_queue);
blk_stat_alloc_callback
這個函數一看也沒什么分析的,主要也是給poll_cb進行賦值,采用了很多的默認函數進行賦值。
struct blk_stat_callback *
blk_stat_alloc_callback(void (*timer_fn)(struct blk_stat_callback *),int (*bucket_fn)(const struct request *), unsigned int buckets, void *data)
{struct blk_stat_callback *cb;cb = kmalloc(sizeof(*cb), GFP_KERNEL);if (!cb)return NULL;cb->stat = kmalloc_array(buckets, sizeof(struct blk_rq_stat), GFP_KERNEL);if (!cb->stat) {kfree(cb);return NULL;}cb->cpu_stat = __alloc_percpu(buckets * sizeof(struct blk_rq_stat), __alignof__(struct blk_rq_stat));if (!cb->cpu_stat) {kfree(cb->stat);kfree(cb);return NULL;}cb->timer_fn = timer_fn;cb->bucket_fn = bucket_fn;cb->data = data;cb->buckets = buckets;timer_setup(&cb->timer, blk_stat_timer_fn, 0);return cb;
}
EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
blk_mq_sysfs_init
接著到這個函數,也是變量的初始化工作,struct request_queue隊列里面的kobject變量初始化,以及取出在每一個cpu上q->queue_ctx結構體,然后對齊成員kobject變量進行初始化。
void blk_mq_sysfs_init(struct request_queue *q)
{struct blk_mq_ctx *ctx;int cpu;kobject_init(&q->mq_kobj, &blk_mq_ktype);for_each_possible_cpu(cpu) {ctx = per_cpu_ptr(q->queue_ctx, cpu);//返回每個cpu上的q->queue_ctx變量的首地址kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);}
}
blk_mq_realloc_hw_ctxs
這個函數相對來說比較重要。后面再補充。
在這里插入代碼片
blk_queue_make_request
這個函數也是給q的其成員賦值的,先大概熟悉一下,如果有實際的調試分析,需要了解某個參數的值,到時再回來看吧。
void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
{/** set defaults*/q->nr_requests = BLKDEV_MAX_RQ; //這個值后面會重新賦值進行覆蓋q->make_request_fn = mfn;blk_queue_dma_alignment(q, 511);blk_queue_congestion_threshold(q);q->nr_batching = BLK_BATCH_REQ;blk_set_default_limits(&q->limits);
}
EXPORT_SYMBOL(blk_queue_make_request);
blk_queue_softirq_done
也是賦值,IO操作完成時,會調用這個回調函數。
void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
{q->softirq_done_fn = fn;
}
EXPORT_SYMBOL(blk_queue_softirq_done);
blk_mq_init_cpu_queues
static void blk_mq_init_cpu_queues(struct request_queue *q, unsigned int nr_hw_queues)
{unsigned int i;for_each_possible_cpu(i) { //硬件隊列數/*返回這個變量在編號為i的cpu上的起始地址*/struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);struct blk_mq_hw_ctx *hctx;//其成員做一些賦值操作__ctx->cpu = i;spin_lock_init(&__ctx->lock);INIT_LIST_HEAD(&__ctx->rq_list);__ctx->queue = q;/** Set local node, IFF we have more than one hw queue. If* not, we remain on the home node of the device*/hctx = blk_mq_map_queue(q, i); //取出每個cpu上的硬件隊列if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)hctx->numa_node = local_memory_node(cpu_to_node(i));}
}
blk_mq_add_queue_tag_set
blk_mq_map_swqueue