proc diskstats - ceragon/LinuxDoc GitHub Wiki
$ cat /proc/diskstats
7 0 loop0 14 0 34 1 0 0 0 0 0 20 1 0 0 0 0 0 0
7 1 loop1 45 0 702 46 0 0 0 0 0 72 46 0 0 0 0 0 0
7 2 loop2 1123 0 48088 263 0 0 0 0 0 2676 263 0 0 0 0 0 0
7 3 loop3 61 0 2208 45 0 0 0 0 0 64 45 0 0 0 0 0 0
7 4 loop4 3465 0 106416 634 0 0 0 0 0 6288 634 0 0 0 0 0 0
7 5 loop5 2926 0 356756 907 0 0 0 0 0 4128 907 0 0 0 0 0 0
7 6 loop6 3153 0 16442 137 0 0 0 0 0 1120 137 0 0 0 0 0 0
7 7 loop7 43 0 700 14 0 0 0 0 0 28 14 0 0 0 0 0 0
8 0 sda 795 24 11305 143450 108 15 154792 4470 0 13432 148450 0 0 0 0 30 529
8 1 sda1 370 24 8714 133638 108 15 154792 4470 0 4092 138108 0 0 0 0 0 0
8 16 sdb 321538 85929 15917616 133118 96261 489238 21298746 418453 0 568976 579014 0 0 0 0 13306 27442
8 17 sdb1 26 0 208 364 0 0 0 0 0 384 364 0 0 0 0 0 0
8 18 sdb2 147 23 10992 486 2 0 2 0 0 544 486 0 0 0 0 0 0
8 19 sdb3 320822 85906 15900258 131753 96259 489238 21298744 418453 0 568312 550207 0 0 0 0 0 0
11 0 sr0 10 0 4 7 0 0 0 0 0 28 7 0 0 0 0 0 0
7 8 loop8 1102 0 28980 216 0 0 0 0 0 1984 216 0 0 0 0 0 0
7 11 loop11 43 0 694 11 0 0 0 0 0 44 11 0 0 0 0 0 0
7 9 loop9 145 0 8316 49 0 0 0 0 0 376 49 0 0 0 0 0 0
7 10 loop10 29 0 378 7 0 0 0 0 0 60 7 0 0 0 0 0 0
7 12 loop12 73 0 2216 3 0 0 0 0 0 48 3 0 0 0 0 0 0
struct hd_struct {
struct disk_stats __percpu *dkstats;
};
struct disk_stats {
unsigned long sectors[NR_STAT_GROUPS];
unsigned long ios[NR_STAT_GROUPS];
};
enum stat_group {
STAT_READ,
STAT_WRITE,
STAT_DISCARD,
NR_STAT_GROUPS
};
static int diskstats_show(struct seq_file *seqf, void *v) {
struct gendisk *gp = v;
struct disk_part_iter piter;
struct hd_struct *hd;
while ((hd = disk_part_iter_next(&piter))) {
seq_printf(seqf, "%4d %7d %s "
"%lu %lu %lu %u "
"%lu %lu %lu %u "
"%u %u %u "
"%lu %lu %lu %u\n",
disk_name(gp, hd->partno, buf),
part_stat_read(hd, ios[STAT_READ]),
part_stat_read(hd, sectors[STAT_READ]),
part_stat_read(hd, ios[STAT_WRITE]),
part_stat_read(hd, sectors[STAT_WRITE]),
part_stat_read(hd, ios[STAT_DISCARD]),
part_stat_read(hd, sectors[STAT_DISCARD]),
);
}
return 0;
}
#define part_stat_inc(cpu, gendiskp, field) \
part_stat_add(cpu, gendiskp, field, 1)
#define part_stat_add(cpu, part, field, addnd) do { \
__part_stat_add((cpu), (part), field, addnd); \
if ((part)->partno) \
__part_stat_add((cpu), &part_to_disk((part))->part0, \
field, addnd); \
} while (0)
#define __part_stat_add(cpu, part, field, addnd) \
(per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))
不用太深究下面方法的调用,只需要知道每个 IO 请求执行成功都会将 ios 计数 +1
static void blk_account_io_done(struct request *req) {
if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
// const int rw = rq_data_dir(req);
const int rw = (req)->cmd_flags & 1;
struct hd_struct *part;
int cpu;
cpu = part_stat_lock();
part = req->part;
// ios[rw] 值 +1
part_stat_inc(cpu, part, ios[rw]);
}
}
static void blk_finish_request(struct request *req, int error) {
blk_account_io_done(req);
}
static bool blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes) {
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
return true;
blk_finish_request(rq, error);
}
static void blk_account_io_completion(struct request *req, unsigned int bytes) {
if (blk_do_io_stat(req)) {
const int rw = rq_data_dir(req);
struct hd_struct *part;
int cpu;
cpu = part_stat_lock();
part = req->part;
// 增加本次IO操作请求的字节数
part_stat_add(cpu, part, sectors[rw], bytes >> 9);
}
}
bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) {
blk_account_io_completion(req, nr_bytes);
}
static bool blk_update_bidi_request(struct request *rq, int error,
unsigned int nr_bytes,
unsigned int bidi_bytes) {
if (blk_update_request(rq, error, nr_bytes))
return true;
if (unlikely(blk_bidi_rq(rq)) &&
blk_update_request(rq->next_rq, error, bidi_bytes))
return true;
}
static bool blk_end_bidi_request(struct request *rq, int error,
unsigned int nr_bytes, unsigned int bidi_bytes) {
if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
return true;
blk_finish_request(rq, error);
}
每次产生 IO 的请求,且请求执行结束的时候会将磁盘的 ios 值 +1,将本次请求的数据量加到 sectors 上。
static void blk_done_softirq(struct softirq_action *h) {
struct list_head *cpu_list, local_list;
// 获取一个CPU本地变量,blk_cpu_done
cpu_list = &__get_cpu_var(blk_cpu_done);
list_replace_init(cpu_list, &local_list);
while (!list_empty(&local_list)) {
struct request *rq;
// 取出一个已完成的IO请求
rq = list_entry(local_list.next, struct request, csd.list);
// 从列表中删除元素
list_del_init(&rq->csd.list);
// 执行 IO 中断的请求
rq->q->softirq_done_fn(rq);
}
}
static void scsi_softirq_done(struct request *rq) {
struct scsi_cmnd *cmd = rq->special;
disposition = scsi_decide_disposition(cmd);
switch (disposition) {
case SUCCESS:
scsi_finish_command(cmd);
break;
}
}
void scsi_finish_command(struct scsi_cmnd *cmd) {
unsigned int good_bytes;
good_bytes = scsi_bufflen(cmd);
scsi_io_completion(cmd, good_bytes);
}
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) {
int result = cmd->result;
int error = 0;
if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
return;
}
static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
int bytes, int requeue) {
if (blk_end_request(req, error, bytes)) {
if (error && scsi_noretry_cmd(cmd))
blk_end_request_all(req, error);
}
}
bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes) {
return blk_end_bidi_request(rq, error, nr_bytes, 0);
}
系统初始化的时候,注册了一个软中断的回调
enum {
BLOCK_SOFTIRQ,
};
static __init int blk_softirq_init(void) {
open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
return 0;
}
subsys_initcall(blk_softirq_init);
void open_softirq(int nr, void (*action)(struct softirq_action *)) {
softirq_vec[nr].action = action;
}
enum {
HI_SOFTIRQ=0,
TIMER_SOFTIRQ,
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
BLOCK_IOPOLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ,
RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */
NR_SOFTIRQS
};
asmlinkage void __do_softirq(void) {
struct softirq_action *h;
__u32 pending;
pending = local_softirq_pending();
h = softirq_vec;
// 大概意思是 pending 的每一位对应了上面的枚举的元素
// 比如我们关注 BLOCK_SOFTIRQ 是第5个,
// 那么 pending 右边数第5位如果为1,就表示 BLOCK_SOFTIRQ 生效
do {
if (pending & 1) {
h->action(h);
}
h++;
pending >>= 1;
} while (pending);
}
void __blk_complete_request(struct request *req) {
int ccpu, cpu, group_cpu;
// 当前的 CPU id
cpu = smp_processor_id();
group_cpu = blk_cpu_to_group(cpu);
// ccpu 可能是发起请求的 CPU 也可能是当前 CPU
if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
ccpu = req->cpu;
else
ccpu = cpu;
if (ccpu == cpu || ccpu == group_cpu) {
// 说明是当前 CPU 处理直接处理
struct list_head *list;
do_local:
list = &__get_cpu_var(blk_cpu_done);
// 将当前请求添加到 CPU 本地变量的 blk_cpu_done 列表的末尾
list_add_tail(&req->csd.list, list);
} else if (raise_blk_irq(ccpu, req))
// 因为 cpu 才是当前运行的 CPU,所以需要对 ccpu 进行抢断
goto do_local;
}
接着看上面的调用链
void blk_complete_request(struct request *req) {
if (unlikely(blk_should_fake_timeout(req->q)))
return;
if (!blk_mark_rq_complete(req))
__blk_complete_request(req);
}
EXPORT_SYMBOL(blk_complete_request);
static void scsi_done(struct scsi_cmnd *cmd) {
blk_complete_request(cmd->request);
}
int scsi_dispatch_cmd(struct scsi_cmnd *cmd) {
int rtn = 0;
if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
cmd->result = DID_NO_CONNECT << 16;
scsi_done(cmd);
goto out;
}
out:
return rtn;
}
static void scsi_request_fn(struct request_queue *q) {
// 设备结构体
struct scsi_device *sdev = q->queuedata;
struct scsi_cmnd *cmd;
struct request *req;
for (;;) {
int rtn;
// 从队列取出一个请求
req = blk_peek_request(q);
if (!req || !scsi_dev_queue_ready(q, sdev))
// 请求不存在,或者还没准备好
break;
// 从队列删除一个请求
if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
blk_start_request(req);
cmd = req->special;
rtn = scsi_dispatch_cmd(cmd);
}
}
struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) {
struct request_queue *q;
// 为 q 注册一个处理函数
q = __scsi_alloc_queue(sdev->host, scsi_request_fn);
return q;
}
static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
unsigned int lun, void *hostdata) {
struct scsi_device *sdev;
struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_ATOMIC);
sdev->request_queue = scsi_alloc_queue(sdev);
sdev->request_queue->queuedata = sdev;
}
void __elv_add_request(struct request_queue *q, struct request *rq, int where)
{
rq->q = q;
switch (where) {
case ELEVATOR_INSERT_REQUEUE:
case ELEVATOR_INSERT_FRONT:
rq->cmd_flags |= REQ_SOFTBARRIER;
list_add(&rq->queuelist, &q->queue_head);
break;
case ELEVATOR_INSERT_BACK:
rq->cmd_flags |= REQ_SOFTBARRIER;
elv_drain_elevator(q);
list_add_tail(&rq->queuelist, &q->queue_head);
__blk_run_queue(q);
break;
case ELEVATOR_INSERT_SORT_MERGE:
case ELEVATOR_INSERT_SORT:
case ELEVATOR_INSERT_FLUSH:
default:
// 省略,总之会放到一个地方
}
}
同步方法
void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
struct request *rq, int at_head,
rq_end_io_fn *done) {
int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
__elv_add_request(q, rq, where);
__blk_run_queue(q);
}