overlayfs如何检测层间发生重叠 - 549642238/linux-stable GitHub Wiki
mount -t overlay overlay -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
overlay不允许upperdir、lowerdir和workdir之间互相重叠(有parent关系)或相同,包括多个lower dir之间,这样会引发不可预知的问题,例如:
$ mkdir workdir merged upperdir lowerdir
$ mkdir lowerdir/upper
$ mount -t overlay overlay -o lowerdir=lowerdir,upperdir=lowerdir/upper,workdir=workdir merged
$ mount | grep overlay
overlay on /root/tmp/merged type overlay (rw,relatime,lowerdir=lowerdir,upperdir=lowerdir/upper,workdir=workdir)
$ cd merged/
$ touch aa
$ cd ../lowerdir/
$ ls -R
.:
upper
./upper:
aa
很明显,lower层应该是只读,可是在merge层写入内容后lower层(lowerdir/upper下有aa)也出现了。如果umount后再次装载并访问merged目录下内容会出现问题:
$ umount merged
$ mount -t overlay overlay -o lowerdir=lowerdir,upperdir=lowerdir/upper,workdir=workdir merged
$ ls -R merged/
merged/:
aa upper
merged/upper:
ls: cannot access 'merged/upper/aa': Stale file handle
aa
因为mount后upper层的aa是lowerdir/upper/aa,然后lower层的upper/aa其实也是lowerdir/upper/aa,fs_lookup返回ESTALE。两个不同的overlay装载实例使用同一个upperdir或workdir也可能引发问题,因为它们都是rw的,不同的装载实例做并发修改删除操作可能会相互影响。为了检测重叠层,overlay提供了一种将inode置上陷阱(“trap”)标记放入哈希表检测重复trap inode的方法
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct ovl_fs *ofs; // overlay文件系统private超级块
if (ofs->config.upperdir) {
err = ovl_get_upper(sb, ofs, &upperpath); // 获取upper层,检查是否已经有打上“trap”标记的inode,同时为upper层的root inode打上“trap”标记
err = ovl_get_workdir(sb, ofs, &upperpath); // 获取workdir,检查是否已经有打上“trap”标记的inode,同时为workdir的root inode打上“trap”标记
}
oe = ovl_get_lowerstack(sb, ofs); // 逐个获取lower层,检查是否已经有打上“trap”标记的inode,同时为每个lower层的root inode打上“trap”标记
err = ovl_check_overlapping_layers(sb, ofs); // 对每个层对应root inode的parent递归检查是否已经打上“trap”标记,重叠层可能是互为祖先关系
}
A. 在mount执行ovl_fill_super的时候对每个layer的root inode加上“trap”标记生成新的inode记录到哈希表,在对每个layer的root inode加上“trap”标记放入哈希表的同时检查是否哈希表已经存在打上“trap”标记的相同的inode,如果有则说明相同的目录项被挂载在overlay文件系统的不同层,发现重叠层并返回错误。
B. 在ovl_fill_super结束所有层初始化之后(将upper、lower、workdir层填充到overlay private超级块),要对每个层对应的root inode的parent递归检查,直到祖先inode,检查每个inode是否在哈希表中,如果在而且有“trap”标记,说明发生了重叠(例如upper层对应的root inode的parent是lower层的root inode),系统返回错误。
C. 在lookup的时候,也要检查每个目录项的inode,如果inode在哈希表中可以找到,而且有“trap”标记,说明在overlay正常mount之后,用户手动执行move dir操作使得某两个层发生重叠,例如mount之后,将upperdir移动到lowerdir之下,这时候upperdir的root inode的parent就是lowerdir的inode,系统检测到重叠并返回错误。
D. 不允许两个overlay文件系统共用upperdir/workdir或者一个overlay文件系统把另一个overlay文件系统的upper层当做lower层使用,在每次mount -> ovl_fill_supert获取upper层和workdir的时候将对这两个层的root inode打上“I_OVL_INUSE”标记,在umount时清除标记,如果upper层目录被一个overlay mount实例使用后再次被另一个overlay文件系统装载,会检测到upper层目录的inode被打上“I_OVL_INUSE”标记,系统检测到正在被使用并报警。
static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
struct path *upperpath)
{
err = ovl_setup_trap(sb, upperpath->dentry, &ofs->upperdir_trap,
"upperdir"); // 对应A,检查哈希表是否已有置上“trap”标记的ofs->upperdir_trap对应inode,如果没有则打上“trap”标记并放入哈希表
if (ovl_inuse_trylock(ofs->upper_mnt->mnt_root)) { // 对应D,检查upper层root inode是否有“I_OVL_INUSE”标记,如果没有则打上“I_OVL_INUSE”标记,如果有则说明upper层的目录被其他overlay文件系统使用,再次重用会给出警告
ofs->upperdir_locked = true;
} else {
err = ovl_report_in_use(ofs, "upperdir");
}
}
ovl_get_lowerstack -> ovl_get_lower_layers:
static int ovl_get_lower_layers(struct super_block *sb, struct ovl_fs *ofs,
struct path *stack, unsigned int numlower)
{
for (i = 0; i < numlower; i++) {
err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir"); // 对应A,检查哈希表是否已有置上“trap”标记的stack[i].dentry对应inode,如果没有则打上“trap”标记并放入哈希表
if (err)
goto out;
if (ovl_is_inuse(stack[i].dentry)) { // 对应D,检查lower层root inode是否有“I_OVL_INUSE”标记,如果有则说明lower层的目录被其他overlay文件系统当做upper层或wordir使用,再次重用会给出警告
err = ovl_report_in_use(ofs, "lowerdir");
if (err)
goto out;
}
ofs->lower_layers[ofs->numlower].trap = trap; // 记录lower层的trap inode
}
out:
return err;
}
static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
struct path *upperpath)
{
if (ovl_inuse_trylock(ofs->workbasedir)) { // 对应D,检查workdir的inode是否有“I_OVL_INUSE”标记,如果没有则打上“I_OVL_INUSE”标记,如果有则说明workdir被其他overlay文件系统当做upper层或wordir使用,再次重用会给出警告
ofs->workdir_locked = true;
} else {
err = ovl_report_in_use(ofs, "workdir");
if (err)
goto out;
}
err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
"workdir"); // 对应A,检查哈希表是否已有置上“trap”标记的ofs->workbasedir对应inode,如果没有则打上“trap”标记并放入哈希表
err = ovl_make_workdir(sb, ofs, &workpath);
}
static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
struct path *workpath)
{
err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir"); // 对应A,检查哈希表是否已有置上“trap”标记的ofs->workdir对应inode,如果没有则打上“trap”标记并放入哈希表,ofs->workdir是实际的工作目录,对应用户指定的workdir下的“work”目录
}
检查/设置陷阱inode,<key, sb, S_DEAD, without-__upperdentry, without-lower>确定一个trap inode
static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
struct inode **ptrap, const char *name) // 检查哈希表是否存在置上“trap”标记的inode和dir对应的inode相同,如果有则证明overlay层之间发生重叠返回错误,如果没有为dir的inode则打上“trap”标记并放入哈希表
{
struct inode *trap;
int err;
trap = ovl_get_trap_inode(sb, dir); // 检查哈希表是否存在置上“trap”标记的inode和dir对应的inode相同,如果有则证明overlay层之间发生重叠返回错误,如果没有为dir的inode则打上“trap”标记并放入哈希表,返回打上“trap”标记的inode
err = PTR_ERR_OR_ZERO(trap);
if (err) {
if (err == -ELOOP)
pr_err("overlayfs: conflicting %s path\n", name);
return err;
}
*ptrap = trap;
return 0;
}
struct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) // 检查/生成trap inode
{
struct inode *key = d_inode(dir);
struct inode *trap;
trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test,
ovl_inode_set, key); // 基于key和sb从哈希表得到inode,没有则新建inode并插入哈希表,如果是新创建的inode会有I_NEW标志位
if (!(trap->i_state & I_NEW)) { // inode原本就存在于哈希表,发生overlay层间发生重叠,肯定是之前层的root inode已经加入哈希表。这时候的“trap”标记仅仅是inode的i_private是key而且inode的i_sb是sb,因为在ovl_fill_super结束之前不会生成该文件系统的inode(该文件系统还没挂载完成,不存在任何除root_dentry之外的ovl_inode)只有在mount完成之后才可以在该装载实例对应的文件系统下新建inode并使inode->i_sb = sb
/* Conflicting layer roots? */
iput(trap);
return ERR_PTR(-ELOOP);
}
trap->i_flags = S_DEAD; // 打上新的“trap”标志位
unlock_new_inode(trap); // 清除trap inode的I_NEW标志位
return trap;
}
static int ovl_check_overlapping_layers(struct super_block *sb,
struct ovl_fs *ofs)
{
if (ofs->upper_mnt) {
err = ovl_check_layer(sb, ofs, ofs->upper_mnt->mnt_root,
"upperdir"); // 对于upper层,对应B,检查哈希表是否已有置上“trap”标记的ofs->upper_mnt->mnt_root对应inode;对应D,检查ofs->upper_mnt->mnt_root是否有“I_OVL_INUSE”标记
if (err)
return err;
/*
* Checking workbasedir avoids hitting ovl_is_inuse(parent) of
* this instance and covers overlapping work and index dirs,
* unless work or index dir have been moved since created inside
* workbasedir. In that case, we already have their traps in
* inode cache and we will catch that case on lookup.
*/
err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); // 对于workdir,对应B,检查哈希表是否已有置上“trap”标记的ofs->workbasedir对应inode;对应D,检查ofs->workbasedir是否有“I_OVL_INUSE”标记
if (err)
return err;
}
for (i = 0; i < ofs->numlower; i++) {
err = ovl_check_layer(sb, ofs,
ofs->lower_layers[i].mnt->mnt_root,
"lowerdir"); // 对于lower层,对应B,检查哈希表是否已有置上“trap”标记的ofs->lower_layers[i].mnt->mnt_root对应inode;对应D,检查ofs->lower_layers[i].mnt->mnt_root是否有“I_OVL_INUSE”标记
if (err)
return err;
}
return 0;
}
递归检查inode的parent是否已经被置上“trap”标记
static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
struct dentry *dentry, const char *name)
{
struct dentry *next = dentry, *parent;
parent = dget_parent(next);
/* Walk back ancestors to root (inclusive) looking for traps */
while (!err && parent != next) { // 递归遍历parent
if (ovl_lookup_trap_inode(sb, parent)) { // parent inode是否在哈希表中存在对应置上“trap”标记的inode,如果有则发生重叠
err = -ELOOP;
pr_err("overlayfs: overlapping %s path\n", name);
} else if (ovl_is_inuse(parent)) { // parent不能是别的overlay文件系统正在使用的upper层或workdir,如果是则警告
err = ovl_report_in_use(ofs, name);
}
next = parent;
parent = dget_parent(next);
dput(next);
}
return err;
}
bool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir)
{
struct inode *key = d_inode(dir);
struct inode *trap;
bool res;
trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); // 基于key和sb返回哈希表中对应的inode
if (!trap)
return false;
res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) &&
!ovl_inode_lower(trap); // 这时候的“trap”标记是置上S_DEAD标志位而且inode即不在upper层又不在lower层。我们在一个已经装载成功的overlay文件系统下创建inode肯定会填充ovl_inode的__upperdentry或lower项,因为它不在lower层就在upper层,对于之前ovl_fill_super标记“trap”的inode这两个项都是空的,加上S_DEAD标志位足以区分哈希表中的inode是否是“trap”。如果只有S_DEAD标志位是无法区分“trap” inode的,因为rmdir会将inode置上S_DEAD,而且这个ovl_inode很可能对应的i_private就是lower层的root inode,其i_sb肯定是ovl_sb,在哈希表中可以找到,可能被当做“trap” inode,但实际上它并不是。
iput(trap);
return res;
}
Step 3. lookup检测trap inode,因为用户在mount之后可能手动将upper层move到lower层或类似的层间移动操作,ovl_lookup_layer -> ovl_lookup_single -> ovl_lookup_trap_inode
static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
const char *name, unsigned int namelen,
size_t prelen, const char *post,
struct dentry **ret)
{
struct dentry *this;
this = lookup_one_len_unlocked(name, base, namelen);
if (!d_can_lookup(this)) {
} else {
if (ovl_lookup_trap_inode(d->sb, this)) { // 对应C,对每个lookup过程中遍历的inode做重叠检查,因为很可能在mount成功之后用户将upper层root dentry移动到lower层下面
/* Caught in a trap of overlapping layers */
err = -ELOOP;
goto out_err;
}
}
}
upper层和workdir禁止被多个overlay文件系统实例同时共享,所以在mount是要对这两个目录做in-use检查并置位in-use。但是即使检查到in-use也不应该返回EBUSY致使overlay mount失败,因为docker可能会导致装载实例泄露每一层的root inode,比如
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
$ unshare -m # 模拟创建docker,创建命名空间
(原命名空间下执行)$ umount merged
(原命名空间下执行)$ mount -t overlay none -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
如果检测到层重用返回EBUSY会导致mount失败,因为在docker命名空间中upperdir对应的inode还处于in-use状态(umount释放的是装载实例,超级块还在,而且docker中还在引用,ofs->workbasedir和ofs->upper_mnt->mnt_root仍处于in-use状态),所以应该给出警告。层重用返回错误仅针对index=on的情况,如果两个overlay的index dir互为祖先关系或相同或者一个index dir是另一个的upper dir则会触发警告并返回EBUSY
static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
{
if (ofs->config.index) { // 配置index=on
pr_err("overlayfs: %s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
name);
return -EBUSY; // 返回错误
} else { // 配置index=off
pr_warn("overlayfs: %s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
name);
return 0;
}
}
$ mkdir -p lowerdir/upper merged merge upper upperdir work workdir test upperdir/new
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=lowerdir/upper,workdir=workdir merged
[148709.131206] overlayfs: overlapping upperdir path
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=lowerdir,workdir=workdir merged
[148751.384637] overlayfs: conflicting lowerdir path
A. 在ovl_fill_super -> ovl_check_overlapping_layers -> ovl_check_layer(upper层) -> ovl_lookup_trap_inode检测到upper层root inode的parent[lowerdir]已经被置上“trap”标记,返回upper层重叠错误。“trap”标记在ovl_fill_super -> ovl_get_lowerstack -> ovl_get_lower_layers -> ovl_setup_trap被置上
B. 在ovl_fill_super -> ovl_get_lowerstack -> ovl_get_lower_layers -> ovl_setup_trap尝试置上“trap”标记时失败,因为相同的inode已经在ovl_fill_super -> ovl_get_upper -> ovl_setup_trap被置上“trap”标记
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
$ mv upperdir/ lowerdir/
$ ls merged
ls: cannot access 'merged/upperdir': Too many levels of symbolic links
A. 移动目录后,ovl_lookup -> ovl_lookup_layer -> ovl_lookup_single到merged目录下的upperdir时会找到对应upper层的dentry,然后发现对应的inode已经被ovl_fill_super -> ovl_get_upper -> ovl_setup_trap置上“trap”标记放入哈希表中,直接在ovl_lookup_single -> ovl_lookup_trap_inode返回错误
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
$ mount -t overlay none -o lowerdir=upperdir,upperdir=upper,workdir=work merge
[151721.722732] overlayfs: lowerdir is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.
A. 第二次装载ovl_fill_super -> ovl_get_lowerstack -> ovl_get_lower_layers -> ovl_is_inuse检测到lower层使用的root inode已经被ovl_fill_super -> ovl_get_upper -> ovl_inuse_trylock置上“I_OVL_INUSE”标志位,给出upper层重用警告
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
$ mount -t overlay none -o lowerdir=upperdir/new,upperdir=upper,workdir=work merge
[156008.980439] overlayfs: lowerdir is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in und.
A. overlay第二次装载在ovl_fill_super -> ovl_check_overlapping_layers -> ovl_check_layer -> ovl_is_inuse递归检测到lower层parent[upperdir]的inode被置上“I_OVL_INUSE”标志位,因为第一次overlay装载的upper层已经将其在ovl_fill_super -> ovl_get_upper -> ovl_inuse_trylock置上“I_OVL_INUSE”标志位
$ mount -t overlay none -o lowerdir=lowerdir,upperdir=upperdir,workdir=workdir merged
$ mount -t overlay none -o lowerdir=test,upperdir=lowerdir,workdir=work merge
# OK
A. 明显第二次挂载的upper层是第一次挂载的lower层,但第二次挂载仍能成功。因为在ovl_fill_super -> ovl_get_lowerstack -> ovl_get_lower_layers时仅对lower层检测是否有“I_OVL_INUSE”标志位,不作修改,不对下次装载的标志位检测产生影响。这样做是有原因的,因为lower层也对inode加上“I_OVL_INUSE”标志位,那么同一lower层就不能被多个overlay装载实例同时使用,失去了共享的意义。但这样做却并不合理,因为在第二次装载的overlay下修改upper层内容会影响到第一次overlay装载实例文件系统的lower层内容显示