task pid - ceragon/LinuxDoc GitHub Wiki
struct upid {
/* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
//
struct hlist_node pid_chain;
};
struct pid
{
atomic_t count;
unsigned int level;
/* lists of tasks that use this pid */
struct hlist_head tasks[PIDTYPE_MAX];
struct rcu_head rcu;
struct upid numbers[1];
};
enum pid_type
{
PIDTYPE_PID,
PIDTYPE_PGID,
PIDTYPE_SID,
PIDTYPE_MAX
};
struct task_struct {
struct pid_link pids[PIDTYPE_MAX];
};
struct pid_link
{
struct hlist_node node;
struct pid *pid;
};
- number[0]: 表示 0 号命令空间对应的 upid
- number[1]: 表示 1 号命令空间对应的 upid
number数组的具体长度取决于 pid->level 的值
pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
struct pid_namespace *ns) {
pid_t nr = 0;
rcu_read_lock();
if (!ns)
// 先得拿到进程对应的 ns
ns = current->nsproxy->pid_ns;
if (likely(pid_alive(task))) { // task 的 pid 大概率存在
if (type != PIDTYPE_PID)
// 因为其他两个值需要从 leader 获取
task = task->group_leader;
// 调用下面的方法获取 pid
nr = pid_nr_ns(task->pids[type].pid, ns);
}
rcu_read_unlock();
return nr;
}
pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) {
struct upid *upid;
pid_t nr = 0;
// pid 不为空, 且指定的 ns 的level 不能大于 pid 设置的 level
if (pid && ns->level <= pid->level) {
// level 是从 0 开始
upid = &pid->numbers[ns->level];
if (upid->ns == ns)
// 找到了指定的 ns
nr = upid->nr;
}
return nr;
}
pid -> tasks 的数组大小是3,且每个元素对应的是 task_struct -> pids -> node 这个值。通过这个 node 又可以反查出 task_struct。
- tasks[0] = pid 对应的进程
- tasks[1] = 进程组对应的进程
- tasks[2] = session id 对应的进程
struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result;
rcu_read_lock();
// 尝试查找 task
result = pid_task(pid, type);
if (result)
// 使用次数 +1
get_task_struct(result);
rcu_read_unlock();
return result;
}
struct task_struct *pid_task(struct pid *pid, enum pid_type type)
{
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
// 其实就是获取第一个 tasks[type] 的头元素
first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
rcu_read_lock_held() ||
lockdep_tasklist_lock_is_held());
if (first)
result = hlist_entry(first, struct task_struct, pids[(type)].node);
}
return result;
}
// 一个全局的 upid 散列表,存了所有进程的 upid。
// 由于 upid 与 pid 是 1:1,所以通过 upid 可以反查出对应的 pid
static struct hlist_head *pid_hash;
struct pid *find_pid_ns(int nr, struct pid_namespace *ns) {
struct hlist_node *elem;
struct upid *pnr;
// hlist_for_each_entry_rcu(pnr, elem,
// &pid_hash[pid_hashfn(nr, ns)], pid_chain)
hlist_head *head = &pid_hash[pid_hashfn(nr, ns)];
// elem = head -> first
for (elem = hlist_first_rcu(head);
(elem // elem 存在
&& elem -> next // elem 的 next 存在
&& pnr = hlist_entry(elem, upid, pid_chain)); // elem 的类型是 hlist_node,其实就是 upid -> pid_chain
elem = hlist_next_rcu(elem) // elem = elem -> next
) {
// pnr 是遍历到的 upid。比较 pid 值是否相等,比较 ns 是否相等。
if (pnr->nr == nr && pnr->ns == ns)
// 虽然得到了目标的 upid。但是 pid 结构体中的 numbers 里存了多个 upid
// upid == numbers[ns->level],通过 container_of 就会反查到 pid 结构体的地址
return container_of(pnr, struct pid, numbers[ns->level]);
}
return NULL;
}
struct pid *find_vpid(int nr) {
// 获取当前命名空间的 pid
return find_pid_ns(nr, current->nsproxy->pid_ns);
}
其实是对于上面方法的包装,先根据 pid 值和 ns 获取对应的 pid 实例。 再通过 pid 实例和 pid 的类型获取对应的 task。
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) {
rcu_lockdep_assert(rcu_read_lock_held());
return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}
struct pidmap {
atomic_t nr_free;
void *page;
};
struct pid_namespace {
struct pidmap pidmap[PIDMAP_ENTRIES];
int last_pid;
};
static int alloc_pidmap(struct pid_namespace *pid_ns) {
int i, offset, max_scan, pid, last = pid_ns->last_pid;
struct pidmap *map;
// last 当前命名空间最近分配的一个 pid。先尝试判断 pid + 1 是否可用
pid = last + 1;
if (pid >= pid_max)
// RESERVED_PIDS = 300
pid = RESERVED_PIDS;
// BITS_PER_PAGE_MASK = 0b0111_1111_1111_1111。十进制是 32767
offset = pid & BITS_PER_PAGE_MASK;
// BITS_PER_PAGE 的十进制是 32768,十六进制是 0x8FFF
// 因为 pidmap 是个数组,所以每 32768 个 id 是一组
map = &pid_ns->pidmap[pid / BITS_PER_PAGE];
/*
* If last_pid points into the middle of the map->page we
* want to scan this bitmap block twice, the second time
* we start with offset == 0 (or RESERVED_PIDS).
*/
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
for (i = 0; i <= max_scan; ++i) {
if (unlikely(!map->page)) {
void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/*
* Free the page if someone raced with us
* installing it:
*/
spin_lock_irq(&pidmap_lock);
if (!map->page) {
map->page = page;
page = NULL;
}
spin_unlock_irq(&pidmap_lock);
kfree(page);
if (unlikely(!map->page))
break;
}
if (likely(atomic_read(&map->nr_free))) {
do {
if (!test_and_set_bit(offset, map->page)) {
atomic_dec(&map->nr_free);
set_last_pid(pid_ns, last, pid);
return pid;
}
offset = find_next_offset(map, offset);
pid = mk_pid(pid_ns, map, offset);
} while (offset < BITS_PER_PAGE && pid < pid_max);
}
if (map < &pid_ns->pidmap[(pid_max - 1) / BITS_PER_PAGE]) {
++map;
offset = 0;
} else {
map = &pid_ns->pidmap[0];
offset = RESERVED_PIDS;
if (unlikely(last == offset))
break;
}
pid = mk_pid(pid_ns, map, offset);
}
return -1;
}