ubifs: How ubifs_link affects an tmpfile's orphan node - 549642238/linux-stable GitHub Wiki
orphan node是一个已经打开的文件进行unlink或删除操作导致其nlink count减为0的inode。在ubifs中,创建tmpfile也会生成orphan inode。下面以mount后创建tmpfile并生成orphan inode到umount再mount后对orphan inode的处理流程为例,在中间过程发生link操作对orphan inode的影响:
A. 在同一个成功mount的ubifs文件系统上创建tmpfile,i_op_tmpfile -> ubifs_tmpfile -> do_tmpfile -> ubifs_add_orphan -> ubifs_add_orphan
static int do_tmpfile(struct inode *dir, struct dentry *dentry,
umode_t mode, struct inode **whiteout)
{
...
if (whiteout) {
mark_inode_dirty(inode);
drop_nlink(inode); // i_nlink减1,此时临时文件inode的i_nlink=0
*whiteout = inode;
} else {
d_tmpfile(dentry, inode); // 调用drop_nlink使i_nlink减1,此时临时文件inode的i_nlink=0
}
// 此时临时文件inode的i_nlink=0
...
err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
...
}
int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
const struct qstr *nm, const struct inode *inode,
int deletion, int xent)
{
int last_reference = !!(deletion && inode->i_nlink == 0); // last_reference = 1
if (last_reference) {
err = ubifs_add_orphan(c, inode->i_ino); // 对创建tmpfile传进来的inode一定会执行这一步
...
}
// ubifs_add_orphan -> orphan_add
static struct ubifs_orphan *orphan_add(struct ubifs_info *c, ino_t inum,
struct ubifs_orphan *parent_orphan)
{
struct ubifs_orphan *orphan, *o;
orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS); // orphan node在内存中的表示
orphan->inum = inum;
orphan->new = 1; // orphan node的new标志位置1
...
spin_lock(&c->orphan_lock); // 加锁c->orphan_lock
p = &c->orph_tree.rb_node;
while (*p) { // 将orphan node插入红黑树c->orph_tree
parent = *p;
o = rb_entry(parent, struct ubifs_orphan, rb);
...
}
c->tot_orphans += 1;
c->new_orphans += 1; // 代表新的orphan node(没有被commit)
rb_link_node(&orphan->rb, parent, p);
rb_insert_color(&orphan->rb, &c->orph_tree);
list_add_tail(&orphan->list, &c->orph_list); // 将orphan node加入c->orph_list
list_add_tail(&orphan->new_list, &c->orph_new); // 将orphan node加入c->orph_new
spin_unlock(&c->orphan_lock); // 解锁c->orphan_lock
...
}
B. 很多操作都可以触发do_commit,比如mkdir、link、mknod等。do_commit对orphan node的处理有两个阶段,分别是ubifs_orphan_start_commit和ubifs_orphan_end_commit。
int ubifs_orphan_start_commit(struct ubifs_info *c)
{
struct ubifs_orphan *orphan, **last;
spin_lock(&c->orphan_lock); // 加锁c->orphan_lock
last = &c->orph_cnext;
list_for_each_entry(orphan, &c->orph_new, new_list) { // 遍历每个c->orph_new链表中的orphan node
ubifs_assert(orphan->new);
ubifs_assert(!orphan->cmt);
orphan->new = 0; // 清掉orphan node的new标志位
orphan->cmt = 1; // 置上orphan node的cmt标志位,代表正在执行commit操作
*last = orphan; // 将orphan node加入c->cnext链表
last = &orphan->cnext;
}
*last = NULL;
c->cmt_orphans = c->new_orphans; // 更新commit orphan node的数量
c->new_orphans = 0; // 现在没有新的orphan node
dbg_cmt("%d orphans to commit", c->cmt_orphans);
INIT_LIST_HEAD(&c->orph_new); // 清空c->orph_new链表
if (c->tot_orphans == 0)
c->no_orphs = 1;
else
c->no_orphs = 0;
spin_unlock(&c->orphan_lock); // 解锁c->orphan_lock
return 0;
}
B2. ubifs_orphan_end_commit -> commit_orphans -> write_orph_nodes -> do_write_orph_node -> do_write_orph_node
int ubifs_orphan_end_commit(struct ubifs_info *c)
{
int err;
if (c->cmt_orphans != 0) {
err = commit_orphans(c); // 如果要提交的orphan node数量不为0
if (err)
return err;
}
erase_deleted(c); // 必须执行一次erase_deleted,删除c->orph_dnext链表上所有的orpan node并将orphan node从rb tree和链表中删除后释放orphan node,因为在commit_orphans中提交是一个批量执行过程,中间会释放c->orphan_lock锁,如果期间发生link操作会将cmt位为1的orphan node放入c->orph_dnext,放入c->orph_dnext的orphan node仍旧会被写入Flash,因为cmt位一旦置位1就会被放入c->orph_cnext链表,c->orph_cnext链表中的orphan node一定会被写入Flash
err = dbg_check_orphans(c);
return err;
}
static void erase_deleted(struct ubifs_info *c)
{
struct ubifs_orphan *orphan, *dnext;
spin_lock(&c->orphan_lock); // 加锁c->orphan_lock
dnext = c->orph_dnext;
while (dnext) { // 遍历c->orph_dnext中每个orphan node,将orphan node从c->orph_tree和c->orph_list中删除,并释放orphan node,c->tot_orphans -= 1,所以一个orphan node要么在c->orph_list中,要么在c->orph_dnext中
orphan = dnext;
dnext = orphan->dnext;
ubifs_assert(c, !orphan->new);
ubifs_assert(c, orphan->del);
rb_erase(&orphan->rb, &c->orph_tree);
list_del(&orphan->list);
c->tot_orphans -= 1;
dbg_gen("deleting orphan ino %lu", (unsigned long)orphan->inum);
kfree(orphan);
}
c->orph_dnext = NULL; // 清空c->orph_dnext链表
spin_unlock(&c->orphan_lock); // 解锁c->orphan_lock
}
static int write_orph_nodes(struct ubifs_info *c, int atomic)
{
...
while (c->cmt_orphans > 0) {
err = write_orph_node(c, atomic); // 批量处理cmt orphan node,直到c->cmt_orphans全部处理完
...
}
...
}
static int write_orph_node(struct ubifs_info *c, int atomic)
{
struct ubifs_orph_node *orph; // Flash上orphan node的表示
...
orph = c->orph_buf; // c->orph_buf会被写入Flash,填充orph就相当于填充c->orph_buf
spin_lock(&c->orphan_lock); // 加锁c->orphan_lock
cnext = c->orph_cnext;
for (i = 0; i < cnt; i++) { // 遍历c->orph_cnext链表上的每个orphan node
orphan = cnext;
ubifs_assert(orphan->cmt);
orph->inos[i] = cpu_to_le64(orphan->inum); // 记录orphan inode号到orph
orphan->cmt = 0; // 清掉orphan node的cmt位
cnext = orphan->cnext;
orphan->cnext = NULL; // 遍历结束后c->orph_cnext链表为空
}
c->orph_cnext = cnext;
c->cmt_orphans -= cnt; // write_orph_node是一个批量处理cmt orphan node的过程,当前处理cnt个
spin_unlock(&c->orphan_lock); // 解锁c->orphan_lock
...
err = do_write_orph_node(c, len, atomic); // 将c->orph_buf写入Flash
...
}
C. 在ubifs umount时,会调用generic_shutdown_super -> sop_put_super -> ubifs_put_super -> ubifs_umount -> free_orphans
static void free_orphans(struct ubifs_info *c)
{
struct ubifs_orphan *orph;
while (c->orph_dnext) { // 清空c->orph_dnext链表,释放上面的每一个orphan node
orph = c->orph_dnext;
c->orph_dnext = orph->dnext;
list_del(&orph->list);
kfree(orph);
}
while (!list_empty(&c->orph_list)) { // 清空c->orph_list链表,释放上面的每一个orphan node
orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
list_del(&orph->list);
kfree(orph);
ubifs_err(c, "orphan list not empty at unmount");
}
vfree(c->orph_buf);
c->orph_buf = NULL;
}
D. mount时处理Flash上的orphan node记录。ubifs_mount -> ubifs_fill_super -> mount_ubifs -> ubifs_mount_orphans -> ubifs_clear_orphans
int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
{
...
if (unclean) // 上次umount操作不是clean的,需要恢复
err = kill_orphans(c);
else if (!read_only) // 上次umount操作是clean,文件系统不是只读
err = ubifs_clear_orphans(c); // 擦除Flash上的orphan node记录
return err;
}
static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
unsigned long long *last_cmt_no, int *outofdate,
int *last_flagged)
{
struct ubifs_scan_node *snod;
struct ubifs_orph_node *orph;
...
list_for_each_entry(snod, &sleb->nodes, list) { // sleb是从Flash上读起来的包含orphan node的LEB数据
orph = snod->node;
...
n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
for (i = 0; i < n; i++) { // 对于每个orphan inode
...
err = ubifs_tnc_lookup(c, &key1, ino); // 根据ino查找对应的inode结构
if (err)
goto out_free;
/*
* Check whether an inode can really get deleted.
* linkat() with O_TMPFILE allows rebirth of an inode.
*/
if (ino->nlink == 0) { // 对于tmpfile,虽然创建时被加入orphan list,如果在commit结束后其orphan node写入Flash,之后
发生link操作(不再是orphan文件),之后又发生unclean umount,再次mount时如果不对ino->nlink计数做判断可能会将其从TNC Tree中删除
dbg_rcvry("deleting orphaned inode %lu",
(unsigned long)inum);
lowest_ino_key(c, &key1, inum);
highest_ino_key(c, &key2, inum);
err = ubifs_tnc_remove_range(c, &key1, &key2); // 将orphan inode从TNC Tree上移除,由于unclean umount可能没有从TNC Tree来得及删除orphan inod
e
if (err)
goto out_ro;
}
err = insert_dead_orphan(c, inum);
if (err)
goto out_free;
}
...
}
...
}
TNC Tree是ubifs在内存管理索引节点(每个inode对应一个index node)的数据结构,对TNC Tree更新直接影响到inode在Flash上的存储。
ubifs_tmpfile -> do_tmpfile -> ubifs_jnl_update -> ubifs_tnc_add
SYSCALL_DEFINE1(close, unsigned int, fd) -> __close_fd -> filp_close -> fput -> ____fput -> __fput -> dput -> dentry_kill -> __dentry_kill -> dentry_iput -> iput(如果nlink为0) -> iput_final -> evict -> sb_evict_inode(inode) -> ubifs_evict_inode -> ubifs_tnc_remove_ino -> ubifs_tnc_remove_ino
static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
struct dentry *dentry)
{
...
/* Handle O_TMPFILE corner case, it is allowed to link a O_TMPFILE. */
if (inode->i_nlink == 0) // 如果链接的inode原来的引用计数为0,可能是一个tmpfile
ubifs_delete_orphan(c, inode->i_ino); // 将对应inode从orphan node记录中删除,防止将其记录到Flash中的orphan node,发生unclean umount时防止遍历Flash记录的orphan inode然后从TNC Tree上删除对应的inode(见步骤D)
inc_nlink(inode); // 对inode的硬链接计数++,如果是tmpfile则硬链接计数为1
...
}
void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
{
struct ubifs_orphan *orph, *child_orph, *tmp_o;
spin_lock(&c->orphan_lock); // 加锁c->orphan_lock
orph = lookup_orphan(c, inum); // orphan node必须在红黑树c->c->orph_tree中存在
if (!orph) {
spin_unlock(&c->orphan_lock);
ubifs_err(c, "missing orphan ino %lu", (unsigned long)inum);
dump_stack();
return;
}
list_for_each_entry_safe(child_orph, tmp_o, &orph->child_list, child_list) { // 如果有child,释放child_orphan
list_del(&child_orph->child_list);
orphan_delete(c, child_orph);
}
orphan_delete(c, orph);
spin_unlock(&c->orphan_lock); // 释放orphan node
}
static void orphan_delete(struct ubifs_info *c, struct ubifs_orphan *orph)
{
if (orph->del) { // 如果orphan node状态为是del,不做处理
dbg_gen("deleted twice ino %lu", orph->inum);
return;
}
if (orph->cmt) { // 如果orphan node处于提交状态
orph->del = 1; // 修改orphan node状态位为del
orph->dnext = c->orph_dnext;
c->orph_dnext = orph; // 将orphan node移入c->orph_dnext
dbg_gen("delete later ino %lu", orph->inum);
return;
} // 如果orphan node已经完成提交(do_commit -> ubifs_orphan_end_commit )或者orphan node还处于c-
>orph_new链表中(代表orphan未提交)
__orphan_drop(c, orph);
}
static void __orphan_drop(struct ubifs_info *c, struct ubifs_orphan *o)
{
rb_erase(&o->rb, &c->orph_tree); // 将orphan node从红黑树删除
list_del(&o->list); // 将orphan node从c->orph_list链表删除
c->tot_orphans -= 1; // orphan node总数减1
if (o->new) {
list_del(&o->new_list); // 如果orphan node还没有提交
c->new_orphans -= 1;
}
kfree(o); // 释放orphan node
}
ubifs_link -> ubifs_delete_orphan直接将orpan node从各种链表和红黑树中删除,orphan node被释放,orphan node没有被放入c->orph_cnext链表,也不会被写入Flash。在关闭tmpfile时,检测到tmp_inode的nlink计数大于0,不需要从TNC Tree中删除。在umount后再mount时,不管umount操作是否clean,tmp_inode都不会从TNC Tree中移除,因为Flash中没有对应tmp_inode的orphan node的记录。
2. 如果ubifs_link发生在do_commit -> ubifs_orphan_end_commit -> commit_orphans中对orpan node清位cmt之前,并且在do_commit -> ubifs_orphan_start_commit对orphan node置位cmt之后(orphan node正在执行提交操作):
ubifs_link -> ubifs_delete_orphan将orphan node的del位置1,将orphan node加入c->orph_dnext链表,此时orphan node已经被放入c->orph_cnext链表,放入c->orph_cnext链表的orpan node一定会被写入Flash。然后ubifs_orphan_end_commit -> erase_deleted将所有在c->orph_dnext链表中的orphan node从其他所有链表和红黑树中移除,并释放orphan node。在关闭tmpfile时,检测到tmp_inode的nlink计数大于0,不需要从TNC Tree中删除。在umount后再mount时,如果上次umount是clean,直接将Flash中orphan node记录擦除,正常结束;如果上次umount是unclean,do_kill_orphans根据Flash中记录的orphan node(inum)会将tmp_inode从TNC Tree中删除,而这个文件其实在umount前被link过了,nlink计数为1,不应该被删除。所以在do_kill_orphans中要对每个读取的orphan node的nlink进行判断,为0则从TNC Tree上删除。
3. 如果ubifs_link发生在do_commit -> ubifs_orphan_end_commit -> commit_orphans中对orpan node清位cmt之后(完成orphan node提交操作):
ubifs_link -> ubifs_delete_orphan直接将orpan node从各种链表和红黑树中删除,orphan node被释放,orphan node已经被写入Flash(因为write_orph_node中cmt清位的orphan node会被写入Flash)。在关闭tmpfile时,检测到tmp_inode的nlink计数大于0,不需要从TNC Tree中删除。在umount后再mount时,如果上次umount是clean,直接将Flash中orphan node记录擦除,正常结束;如果上次umount是unclean,do_kill_orphans根据Flash中记录的orphan node(inum)会将tmp_inode从TNC Tree中删除,而这个文件其实在umount前被link过了,nlink计数为1,不应该被删除。所以在do_kill_orphans中要对每个读取的orphan node的nlink进行判断,为0则从TNC Tree上删除。
#include <stdio.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
int main()
{
int err;
int fd = open("/root", __O_TMPFILE | O_RDWR, S_IRUSR | S_IWUSR);
if (fd < 0)
printf("E 1\n");
write(fd, "123", 3);
//close(fd);
char tmp_fname[30];
sprintf(tmp_fname, "/proc/self/fd/%d", fd);
err = linkat(AT_FDCWD, tmp_fname, -1, "/root/tmp/newfile", AT_SYMLINK_FOLLOW);
if (err)
printf("E 3 %d\n", err);
return 0;
}