mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2026-03-22 07:27:12 +08:00
The EROFS on-disk format uses a tiny, plain metadata design that prioritizes performance and minimizes complex inconsistencies against common writable disk filesystems (almost all serious metadata inconsistency cannot happen in well-designed immutable filesystems like EROFS). EROFS deliberately avoids artificial design flaws to eliminate serious security risks from untrusted remote sources by design, although human-made implementation bugs can still happen sometimes. Currently, there is no strict check to prevent compressed inodes, especially LZ4-compressed inodes, from being read in plain filesystems. Starting with erofs-utils 1.0 and Linux 5.3, LZ4_0PADDING sb feature is automatically enabled for LZ4-compressed EROFS images to support in-place decompression. Furthermore, since Linux 5.4 LTS is no longer supported, we no longer need to handle ancient LZ4-compressed EROFS images generated by erofs-utils prior to 1.0. To formally distinguish different filesystem types for improved security: - Use the presence of LZ4_0PADDING or a non-zero `dsb->u1.lz4_max_distance` as a marker for compressed filesystems containing LZ4-compressed inodes only; - For other algorithms, use `dsb->u1.available_compr_algs` bitmap. Note: LZ4_0PADDING has been supported since Linux 5.4 (the first formal kernel version), so exposing it via sysfs is no longer necessary and is now deprecated (but remain it for five more years until 2031): `dsb->u1` has been strictly non-zero for all EROFS images containing compressed inodes starting with erofs-utils v1.3 and it is actually a much better marker for compressed filesystems. Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
394 lines
11 KiB
C
394 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2017-2018 HUAWEI, Inc.
|
|
* https://www.huawei.com/
|
|
* Copyright (C) 2021, Alibaba Cloud
|
|
*/
|
|
#include "xattr.h"
|
|
#include <linux/compat.h>
|
|
#include <trace/events/erofs.h>
|
|
|
|
static int erofs_fill_symlink(struct inode *inode, void *bptr, unsigned int ofs)
|
|
{
|
|
struct erofs_inode *vi = EROFS_I(inode);
|
|
char *link;
|
|
loff_t end;
|
|
|
|
ofs += vi->xattr_isize;
|
|
/* check whether the symlink data is small enough to be inlined */
|
|
if (vi->datalayout == EROFS_INODE_FLAT_INLINE &&
|
|
!check_add_overflow(ofs, inode->i_size, &end) &&
|
|
end <= i_blocksize(inode)) {
|
|
link = kmemdup_nul(bptr + ofs, inode->i_size, GFP_KERNEL);
|
|
if (!link)
|
|
return -ENOMEM;
|
|
if (unlikely(!inode->i_size || strlen(link) != inode->i_size)) {
|
|
erofs_err(inode->i_sb, "invalid fast symlink size %llu @ nid %llu",
|
|
inode->i_size | 0ULL, vi->nid);
|
|
kfree(link);
|
|
return -EFSCORRUPTED;
|
|
}
|
|
inode_set_cached_link(inode, link, inode->i_size);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int erofs_read_inode(struct inode *inode)
|
|
{
|
|
struct super_block *sb = inode->i_sb;
|
|
erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode));
|
|
unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode));
|
|
bool in_mbox = erofs_inode_in_metabox(inode);
|
|
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
|
|
struct erofs_sb_info *sbi = EROFS_SB(sb);
|
|
erofs_blk_t addrmask = BIT_ULL(48) - 1;
|
|
struct erofs_inode *vi = EROFS_I(inode);
|
|
struct erofs_inode_extended *die, copied;
|
|
struct erofs_inode_compact *dic;
|
|
unsigned int ifmt;
|
|
void *ptr;
|
|
int err = 0;
|
|
|
|
ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), in_mbox);
|
|
if (IS_ERR(ptr)) {
|
|
err = PTR_ERR(ptr);
|
|
erofs_err(sb, "failed to read inode meta block (nid: %llu): %d",
|
|
vi->nid, err);
|
|
goto err_out;
|
|
}
|
|
|
|
dic = ptr + ofs;
|
|
ifmt = le16_to_cpu(dic->i_format);
|
|
if (ifmt & ~EROFS_I_ALL) {
|
|
erofs_err(sb, "unsupported i_format %u of nid %llu",
|
|
ifmt, vi->nid);
|
|
err = -EOPNOTSUPP;
|
|
goto err_out;
|
|
}
|
|
|
|
vi->datalayout = erofs_inode_datalayout(ifmt);
|
|
if (vi->datalayout >= EROFS_INODE_DATALAYOUT_MAX) {
|
|
erofs_err(sb, "unsupported datalayout %u of nid %llu",
|
|
vi->datalayout, vi->nid);
|
|
err = -EOPNOTSUPP;
|
|
goto err_out;
|
|
}
|
|
|
|
switch (erofs_inode_version(ifmt)) {
|
|
case EROFS_INODE_LAYOUT_EXTENDED:
|
|
vi->inode_isize = sizeof(struct erofs_inode_extended);
|
|
/* check if the extended inode acrosses block boundary */
|
|
if (ofs + vi->inode_isize <= sb->s_blocksize) {
|
|
ofs += vi->inode_isize;
|
|
die = (struct erofs_inode_extended *)dic;
|
|
copied.i_u = die->i_u;
|
|
copied.i_nb = die->i_nb;
|
|
} else {
|
|
const unsigned int gotten = sb->s_blocksize - ofs;
|
|
|
|
memcpy(&copied, dic, gotten);
|
|
ptr = erofs_read_metabuf(&buf, sb,
|
|
erofs_pos(sb, blkaddr + 1), in_mbox);
|
|
if (IS_ERR(ptr)) {
|
|
err = PTR_ERR(ptr);
|
|
erofs_err(sb, "failed to read inode payload block (nid: %llu): %d",
|
|
vi->nid, err);
|
|
goto err_out;
|
|
}
|
|
ofs = vi->inode_isize - gotten;
|
|
memcpy((u8 *)&copied + gotten, ptr, ofs);
|
|
die = &copied;
|
|
}
|
|
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
|
|
|
|
inode->i_mode = le16_to_cpu(die->i_mode);
|
|
i_uid_write(inode, le32_to_cpu(die->i_uid));
|
|
i_gid_write(inode, le32_to_cpu(die->i_gid));
|
|
set_nlink(inode, le32_to_cpu(die->i_nlink));
|
|
inode_set_mtime(inode, le64_to_cpu(die->i_mtime),
|
|
le32_to_cpu(die->i_mtime_nsec));
|
|
|
|
inode->i_size = le64_to_cpu(die->i_size);
|
|
break;
|
|
case EROFS_INODE_LAYOUT_COMPACT:
|
|
vi->inode_isize = sizeof(struct erofs_inode_compact);
|
|
ofs += vi->inode_isize;
|
|
vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
|
|
|
|
inode->i_mode = le16_to_cpu(dic->i_mode);
|
|
copied.i_u = dic->i_u;
|
|
i_uid_write(inode, le16_to_cpu(dic->i_uid));
|
|
i_gid_write(inode, le16_to_cpu(dic->i_gid));
|
|
if (!S_ISDIR(inode->i_mode) &&
|
|
((ifmt >> EROFS_I_NLINK_1_BIT) & 1)) {
|
|
set_nlink(inode, 1);
|
|
copied.i_nb = dic->i_nb;
|
|
} else {
|
|
set_nlink(inode, le16_to_cpu(dic->i_nb.nlink));
|
|
copied.i_nb.startblk_hi = 0;
|
|
addrmask = BIT_ULL(32) - 1;
|
|
}
|
|
inode_set_mtime(inode, sbi->epoch + le32_to_cpu(dic->i_mtime),
|
|
sbi->fixed_nsec);
|
|
|
|
inode->i_size = le32_to_cpu(dic->i_size);
|
|
break;
|
|
default:
|
|
erofs_err(sb, "unsupported on-disk inode version %u of nid %llu",
|
|
erofs_inode_version(ifmt), vi->nid);
|
|
err = -EOPNOTSUPP;
|
|
goto err_out;
|
|
}
|
|
|
|
if (unlikely(inode->i_size < 0)) {
|
|
erofs_err(sb, "negative i_size @ nid %llu", vi->nid);
|
|
err = -EFSCORRUPTED;
|
|
goto err_out;
|
|
}
|
|
|
|
if (IS_ENABLED(CONFIG_EROFS_FS_POSIX_ACL) &&
|
|
erofs_inode_has_noacl(inode, ptr, ofs))
|
|
cache_no_acl(inode);
|
|
|
|
switch (inode->i_mode & S_IFMT) {
|
|
case S_IFDIR:
|
|
vi->dot_omitted = (ifmt >> EROFS_I_DOT_OMITTED_BIT) & 1;
|
|
fallthrough;
|
|
case S_IFREG:
|
|
case S_IFLNK:
|
|
vi->startblk = le32_to_cpu(copied.i_u.startblk_lo) |
|
|
((u64)le16_to_cpu(copied.i_nb.startblk_hi) << 32);
|
|
if (vi->datalayout == EROFS_INODE_FLAT_PLAIN &&
|
|
!((vi->startblk ^ EROFS_NULL_ADDR) & addrmask))
|
|
vi->startblk = EROFS_NULL_ADDR;
|
|
|
|
if(S_ISLNK(inode->i_mode)) {
|
|
err = erofs_fill_symlink(inode, ptr, ofs);
|
|
if (err)
|
|
goto err_out;
|
|
}
|
|
break;
|
|
case S_IFCHR:
|
|
case S_IFBLK:
|
|
inode->i_rdev = new_decode_dev(le32_to_cpu(copied.i_u.rdev));
|
|
break;
|
|
case S_IFIFO:
|
|
case S_IFSOCK:
|
|
inode->i_rdev = 0;
|
|
break;
|
|
default:
|
|
erofs_err(sb, "bogus i_mode (%o) @ nid %llu", inode->i_mode,
|
|
vi->nid);
|
|
err = -EFSCORRUPTED;
|
|
goto err_out;
|
|
}
|
|
|
|
if (!erofs_inode_is_data_compressed(vi->datalayout)) {
|
|
inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9;
|
|
} else if (!IS_ENABLED(CONFIG_EROFS_FS_ZIP) || !sbi->available_compr_algs) {
|
|
erofs_err(sb, "compressed inode (nid %llu) is invalid in a plain filesystem",
|
|
vi->nid);
|
|
err = -EFSCORRUPTED;
|
|
goto err_out;
|
|
} else {
|
|
inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) <<
|
|
(sb->s_blocksize_bits - 9);
|
|
}
|
|
|
|
if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
|
|
/* fill chunked inode summary info */
|
|
vi->chunkformat = le16_to_cpu(copied.i_u.c.format);
|
|
if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
|
|
erofs_err(sb, "unsupported chunk format %x of nid %llu",
|
|
vi->chunkformat, vi->nid);
|
|
err = -EOPNOTSUPP;
|
|
goto err_out;
|
|
}
|
|
vi->chunkbits = sb->s_blocksize_bits +
|
|
(vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
|
|
}
|
|
inode_set_atime_to_ts(inode,
|
|
inode_set_ctime_to_ts(inode, inode_get_mtime(inode)));
|
|
|
|
inode->i_flags &= ~S_DAX;
|
|
if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
|
|
(vi->datalayout == EROFS_INODE_FLAT_PLAIN ||
|
|
vi->datalayout == EROFS_INODE_CHUNK_BASED))
|
|
inode->i_flags |= S_DAX;
|
|
err_out:
|
|
erofs_put_metabuf(&buf);
|
|
return err;
|
|
}
|
|
|
|
static int erofs_fill_inode(struct inode *inode)
|
|
{
|
|
int err;
|
|
|
|
trace_erofs_fill_inode(inode);
|
|
err = erofs_read_inode(inode);
|
|
if (err)
|
|
return err;
|
|
|
|
switch (inode->i_mode & S_IFMT) {
|
|
case S_IFREG:
|
|
inode->i_op = &erofs_generic_iops;
|
|
inode->i_fop = erofs_ishare_fill_inode(inode) ?
|
|
&erofs_ishare_fops : &erofs_file_fops;
|
|
break;
|
|
case S_IFDIR:
|
|
inode->i_op = &erofs_dir_iops;
|
|
inode->i_fop = &erofs_dir_fops;
|
|
inode_nohighmem(inode);
|
|
break;
|
|
case S_IFLNK:
|
|
if (inode->i_link)
|
|
inode->i_op = &erofs_fast_symlink_iops;
|
|
else
|
|
inode->i_op = &erofs_symlink_iops;
|
|
inode_nohighmem(inode);
|
|
break;
|
|
default:
|
|
inode->i_op = &erofs_generic_iops;
|
|
init_special_inode(inode, inode->i_mode, inode->i_rdev);
|
|
return 0;
|
|
}
|
|
|
|
mapping_set_large_folios(inode->i_mapping);
|
|
return erofs_inode_set_aops(inode, inode, false);
|
|
}
|
|
|
|
/*
|
|
* ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
|
|
* so that it will fit.
|
|
*/
|
|
static ino_t erofs_squash_ino(struct super_block *sb, erofs_nid_t nid)
|
|
{
|
|
u64 ino64 = erofs_nid_to_ino64(EROFS_SB(sb), nid);
|
|
|
|
if (sizeof(ino_t) < sizeof(erofs_nid_t))
|
|
ino64 ^= ino64 >> (sizeof(erofs_nid_t) - sizeof(ino_t)) * 8;
|
|
return (ino_t)ino64;
|
|
}
|
|
|
|
static int erofs_iget5_eq(struct inode *inode, void *opaque)
|
|
{
|
|
return EROFS_I(inode)->nid == *(erofs_nid_t *)opaque;
|
|
}
|
|
|
|
static int erofs_iget5_set(struct inode *inode, void *opaque)
|
|
{
|
|
const erofs_nid_t nid = *(erofs_nid_t *)opaque;
|
|
|
|
inode->i_ino = erofs_squash_ino(inode->i_sb, nid);
|
|
EROFS_I(inode)->nid = nid;
|
|
return 0;
|
|
}
|
|
|
|
struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid)
|
|
{
|
|
struct inode *inode;
|
|
|
|
inode = iget5_locked(sb, erofs_squash_ino(sb, nid), erofs_iget5_eq,
|
|
erofs_iget5_set, &nid);
|
|
if (!inode)
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
if (inode_state_read_once(inode) & I_NEW) {
|
|
int err = erofs_fill_inode(inode);
|
|
|
|
if (err) {
|
|
iget_failed(inode);
|
|
return ERR_PTR(err);
|
|
}
|
|
unlock_new_inode(inode);
|
|
}
|
|
return inode;
|
|
}
|
|
|
|
int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
|
|
struct kstat *stat, u32 request_mask,
|
|
unsigned int query_flags)
|
|
{
|
|
struct inode *const inode = d_inode(path->dentry);
|
|
struct block_device *bdev = inode->i_sb->s_bdev;
|
|
bool compressed =
|
|
erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout);
|
|
|
|
if (compressed)
|
|
stat->attributes |= STATX_ATTR_COMPRESSED;
|
|
stat->attributes |= STATX_ATTR_IMMUTABLE;
|
|
stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
|
|
STATX_ATTR_IMMUTABLE);
|
|
|
|
/*
|
|
* Return the DIO alignment restrictions if requested.
|
|
*
|
|
* In EROFS, STATX_DIOALIGN is only supported in bdev-based mode
|
|
* and uncompressed inodes, otherwise we report no DIO support.
|
|
*/
|
|
if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
|
|
stat->result_mask |= STATX_DIOALIGN;
|
|
if (bdev && !compressed) {
|
|
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
|
|
stat->dio_offset_align = bdev_logical_block_size(bdev);
|
|
}
|
|
}
|
|
generic_fillattr(idmap, request_mask, inode, stat);
|
|
return 0;
|
|
}
|
|
|
|
static int erofs_ioctl_get_volume_label(struct inode *inode, void __user *arg)
|
|
{
|
|
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
|
|
int ret;
|
|
|
|
if (!sbi->volume_name)
|
|
ret = clear_user(arg, 1);
|
|
else
|
|
ret = copy_to_user(arg, sbi->volume_name,
|
|
strlen(sbi->volume_name));
|
|
return ret ? -EFAULT : 0;
|
|
}
|
|
|
|
long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct inode *inode = file_inode(filp);
|
|
void __user *argp = (void __user *)arg;
|
|
|
|
switch (cmd) {
|
|
case FS_IOC_GETFSLABEL:
|
|
return erofs_ioctl_get_volume_label(inode, argp);
|
|
default:
|
|
return -ENOTTY;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
long erofs_compat_ioctl(struct file *filp, unsigned int cmd,
|
|
unsigned long arg)
|
|
{
|
|
return erofs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
|
|
}
|
|
#endif
|
|
|
|
const struct inode_operations erofs_generic_iops = {
|
|
.getattr = erofs_getattr,
|
|
.listxattr = erofs_listxattr,
|
|
.get_inode_acl = erofs_get_acl,
|
|
.fiemap = erofs_fiemap,
|
|
};
|
|
|
|
const struct inode_operations erofs_symlink_iops = {
|
|
.get_link = page_get_link,
|
|
.getattr = erofs_getattr,
|
|
.listxattr = erofs_listxattr,
|
|
.get_inode_acl = erofs_get_acl,
|
|
};
|
|
|
|
const struct inode_operations erofs_fast_symlink_iops = {
|
|
.get_link = simple_get_link,
|
|
.getattr = erofs_getattr,
|
|
.listxattr = erofs_listxattr,
|
|
.get_inode_acl = erofs_get_acl,
|
|
};
|