proc diskstats - ceragon/LinuxDoc GitHub Wiki

proc diskstats

数据样例

$ cat /proc/diskstats 
   7       0 loop0 14 0 34 1 0 0 0 0 0 20 1 0 0 0 0 0 0
   7       1 loop1 45 0 702 46 0 0 0 0 0 72 46 0 0 0 0 0 0
   7       2 loop2 1123 0 48088 263 0 0 0 0 0 2676 263 0 0 0 0 0 0
   7       3 loop3 61 0 2208 45 0 0 0 0 0 64 45 0 0 0 0 0 0
   7       4 loop4 3465 0 106416 634 0 0 0 0 0 6288 634 0 0 0 0 0 0
   7       5 loop5 2926 0 356756 907 0 0 0 0 0 4128 907 0 0 0 0 0 0
   7       6 loop6 3153 0 16442 137 0 0 0 0 0 1120 137 0 0 0 0 0 0
   7       7 loop7 43 0 700 14 0 0 0 0 0 28 14 0 0 0 0 0 0
   8       0 sda 795 24 11305 143450 108 15 154792 4470 0 13432 148450 0 0 0 0 30 529
   8       1 sda1 370 24 8714 133638 108 15 154792 4470 0 4092 138108 0 0 0 0 0 0
   8      16 sdb 321538 85929 15917616 133118 96261 489238 21298746 418453 0 568976 579014 0 0 0 0 13306 27442
   8      17 sdb1 26 0 208 364 0 0 0 0 0 384 364 0 0 0 0 0 0
   8      18 sdb2 147 23 10992 486 2 0 2 0 0 544 486 0 0 0 0 0 0
   8      19 sdb3 320822 85906 15900258 131753 96259 489238 21298744 418453 0 568312 550207 0 0 0 0 0 0
  11       0 sr0 10 0 4 7 0 0 0 0 0 28 7 0 0 0 0 0 0
   7       8 loop8 1102 0 28980 216 0 0 0 0 0 1984 216 0 0 0 0 0 0
   7      11 loop11 43 0 694 11 0 0 0 0 0 44 11 0 0 0 0 0 0
   7       9 loop9 145 0 8316 49 0 0 0 0 0 376 49 0 0 0 0 0 0
   7      10 loop10 29 0 378 7 0 0 0 0 0 60 7 0 0 0 0 0 0
   7      12 loop12 73 0 2216 3 0 0 0 0 0 48 3 0 0 0 0 0 0

源码

显示

struct hd_struct {
    struct disk_stats __percpu *dkstats;
};
struct disk_stats {
    unsigned long sectors[NR_STAT_GROUPS];
	unsigned long ios[NR_STAT_GROUPS];
};
enum stat_group {
	STAT_READ,
	STAT_WRITE,
	STAT_DISCARD,

	NR_STAT_GROUPS
};
static int diskstats_show(struct seq_file *seqf, void *v) {
    struct gendisk *gp = v;
    struct disk_part_iter piter;
    struct hd_struct *hd;
    while ((hd = disk_part_iter_next(&piter))) {
        seq_printf(seqf, "%4d %7d %s "
			   "%lu %lu %lu %u "
			   "%lu %lu %lu %u "
			   "%u %u %u "
			   "%lu %lu %lu %u\n",
               disk_name(gp, hd->partno, buf),
               part_stat_read(hd, ios[STAT_READ]),
               part_stat_read(hd, sectors[STAT_READ]),
               part_stat_read(hd, ios[STAT_WRITE]),
               part_stat_read(hd, sectors[STAT_WRITE]),
               part_stat_read(hd, ios[STAT_DISCARD]),
               part_stat_read(hd, sectors[STAT_DISCARD]),
            );
    }
    return 0;
}

宏定义

#define part_stat_inc(cpu, gendiskp, field)				\
	part_stat_add(cpu, gendiskp, field, 1)

#define part_stat_add(cpu, part, field, addnd)	do {			\
	__part_stat_add((cpu), (part), field, addnd);			\
	if ((part)->partno)						\
		__part_stat_add((cpu), &part_to_disk((part))->part0,	\
				field, addnd);				\
} while (0)

#define __part_stat_add(cpu, part, field, addnd)			\
	(per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))

ios 的赋值

不用太深究下面方法的调用,只需要知道每个 IO 请求执行成功都会将 ios 计数 +1

static void blk_account_io_done(struct request *req) {
    if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
//        const int rw = rq_data_dir(req);
        const int rw = (req)->cmd_flags & 1;
        struct hd_struct *part;
        int cpu;
        cpu = part_stat_lock();
        part = req->part;
        // ios[rw] 值 +1
        part_stat_inc(cpu, part, ios[rw]);
    }
}
static void blk_finish_request(struct request *req, int error) {
    blk_account_io_done(req);
}
static bool blk_end_bidi_request(struct request *rq, int error,
				 unsigned int nr_bytes, unsigned int bidi_bytes) {
    if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
		return true;
    blk_finish_request(rq, error);
}

sectors 赋值

static void blk_account_io_completion(struct request *req, unsigned int bytes) {
    if (blk_do_io_stat(req)) {
        const int rw = rq_data_dir(req);
		struct hd_struct *part;
		int cpu;
        
		cpu = part_stat_lock();
		part = req->part;
        // 增加本次IO操作请求的字节数
		part_stat_add(cpu, part, sectors[rw], bytes >> 9);
    }
}
bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) {
    blk_account_io_completion(req, nr_bytes);
}
static bool blk_update_bidi_request(struct request *rq, int error,
				    unsigned int nr_bytes,
				    unsigned int bidi_bytes) {
    if (blk_update_request(rq, error, nr_bytes))
		return true;
    if (unlikely(blk_bidi_rq(rq)) &&
	    blk_update_request(rq->next_rq, error, bidi_bytes))
		return true;
}
static bool blk_end_bidi_request(struct request *rq, int error,
				 unsigned int nr_bytes, unsigned int bidi_bytes) {
    if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
		return true;
    blk_finish_request(rq, error);
}

综上

每次产生 IO 的请求,且请求执行结束的时候会将磁盘的 ios 值 +1,将本次请求的数据量加到 sectors 上。

request 的产生

req的调用路径

static void blk_done_softirq(struct softirq_action *h) {
    struct list_head *cpu_list, local_list;
    // 获取一个CPU本地变量,blk_cpu_done
    cpu_list = &__get_cpu_var(blk_cpu_done);
    list_replace_init(cpu_list, &local_list);
    while (!list_empty(&local_list)) {
        struct request *rq;
        // 取出一个已完成的IO请求
        rq = list_entry(local_list.next, struct request, csd.list);
        // 从列表中删除元素
        list_del_init(&rq->csd.list);
        // 执行 IO 中断的请求
        rq->q->softirq_done_fn(rq);
    }
}
static void scsi_softirq_done(struct request *rq) {
    struct scsi_cmnd *cmd = rq->special;
    disposition = scsi_decide_disposition(cmd);
    switch (disposition) {
        case SUCCESS:
			scsi_finish_command(cmd);
			break;
    }
}
void scsi_finish_command(struct scsi_cmnd *cmd) {
    unsigned int good_bytes;
    good_bytes = scsi_bufflen(cmd);
    scsi_io_completion(cmd, good_bytes);
}
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) {
    int result = cmd->result;
    int error = 0;
    if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
		return;
}
static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
					  int bytes, int requeue) {
    if (blk_end_request(req, error, bytes)) {
        if (error && scsi_noretry_cmd(cmd))
			blk_end_request_all(req, error);
    }
}
bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) {
	return blk_end_bidi_request(rq, error, nr_bytes, 0);
}

软中断的产生路径

系统初始化的时候,注册了一个软中断的回调

enum {
    BLOCK_SOFTIRQ,
};

static __init int blk_softirq_init(void) {
    open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
    return 0;
}

subsys_initcall(blk_softirq_init);
void open_softirq(int nr, void (*action)(struct softirq_action *)) {
	softirq_vec[nr].action = action;
}
enum {
	HI_SOFTIRQ=0,
	TIMER_SOFTIRQ,
	NET_TX_SOFTIRQ,
	NET_RX_SOFTIRQ,
	BLOCK_SOFTIRQ,
	BLOCK_IOPOLL_SOFTIRQ,
	TASKLET_SOFTIRQ,
	SCHED_SOFTIRQ,
	HRTIMER_SOFTIRQ,
	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */

	NR_SOFTIRQS
};
asmlinkage void __do_softirq(void) {
    struct softirq_action *h;
    __u32 pending;
    pending = local_softirq_pending();
    h = softirq_vec;
    // 大概意思是 pending 的每一位对应了上面的枚举的元素
    // 比如我们关注 BLOCK_SOFTIRQ 是第5个,
    // 那么 pending 右边数第5位如果为1,就表示 BLOCK_SOFTIRQ 生效
    do {
        if (pending & 1) {
            h->action(h);
        }
        h++;
        pending >>= 1;
    } while (pending);
}

blk_cpu_done 赋值

void __blk_complete_request(struct request *req) {
    int ccpu, cpu, group_cpu;
    // 当前的 CPU id
    cpu = smp_processor_id();
    group_cpu = blk_cpu_to_group(cpu);
    // ccpu 可能是发起请求的 CPU 也可能是当前 CPU
    if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
		ccpu = req->cpu;
	else
		ccpu = cpu;
    if (ccpu == cpu || ccpu == group_cpu) {
        // 说明是当前 CPU 处理直接处理
        struct list_head *list;
do_local:        
        list = &__get_cpu_var(blk_cpu_done);
        // 将当前请求添加到 CPU 本地变量的 blk_cpu_done 列表的末尾
        list_add_tail(&req->csd.list, list);
    } else if (raise_blk_irq(ccpu, req))
        // 因为 cpu 才是当前运行的 CPU,所以需要对 ccpu 进行抢断
		goto do_local;
}

接着看上面的调用链

void blk_complete_request(struct request *req) {
	if (unlikely(blk_should_fake_timeout(req->q)))
		return;
	if (!blk_mark_rq_complete(req))
		__blk_complete_request(req);
}
EXPORT_SYMBOL(blk_complete_request);
static void scsi_done(struct scsi_cmnd *cmd) {
	blk_complete_request(cmd->request);
}
int scsi_dispatch_cmd(struct scsi_cmnd *cmd) {
	int rtn = 0;
    if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
        cmd->result = DID_NO_CONNECT << 16;
		scsi_done(cmd);
		goto out;
    }
 out:
	return rtn;
}
static void scsi_request_fn(struct request_queue *q) {
    // 设备结构体
    struct scsi_device *sdev = q->queuedata;
	struct scsi_cmnd *cmd;
	struct request *req;
    for (;;) {
		int rtn;
        // 从队列取出一个请求
        req = blk_peek_request(q);
        if (!req || !scsi_dev_queue_ready(q, sdev))
            // 请求不存在,或者还没准备好
			break;
        // 从队列删除一个请求
        if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
			blk_start_request(req);
        cmd = req->special;
        rtn = scsi_dispatch_cmd(cmd);
    }
}
struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) {
    struct request_queue *q;
    // 为 q 注册一个处理函数
    q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
    return q;
}
static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
					   unsigned int lun, void *hostdata) {
    struct scsi_device *sdev;
    struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
    sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_ATOMIC);
    sdev->request_queue = scsi_alloc_queue(sdev);
    sdev->request_queue->queuedata = sdev;
}

request_queue 赋值

void __elv_add_request(struct request_queue *q, struct request *rq, int where)
{
    rq->q = q;
    switch (where) {
        case ELEVATOR_INSERT_REQUEUE:
        case ELEVATOR_INSERT_FRONT:
            rq->cmd_flags |= REQ_SOFTBARRIER;
            list_add(&rq->queuelist, &q->queue_head);
            break;
        case ELEVATOR_INSERT_BACK:
            rq->cmd_flags |= REQ_SOFTBARRIER;
            elv_drain_elevator(q);
            list_add_tail(&rq->queuelist, &q->queue_head);
            __blk_run_queue(q);
            break;
        case ELEVATOR_INSERT_SORT_MERGE:
        case ELEVATOR_INSERT_SORT:
        case ELEVATOR_INSERT_FLUSH:
        default:    
        // 省略,总之会放到一个地方    
    }
}

同步方法

void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
			   struct request *rq, int at_head,
			   rq_end_io_fn *done) {
    int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
    __elv_add_request(q, rq, where);
    __blk_run_queue(q);
}
⚠️ **GitHub.com Fallback** ⚠️