Linux Kernel seq_file 接口 • Eritque Arcus

TL;DR

seq_file 在内核里如果需要传输列表类型的数据还是比较好用的, 比如 show_fiq_list, show_cache_info 之类的. 实现基本是每一个 fd 都创建一个不定长 buffer, 每次优先读取缓存内容然后用 item 级别更新缓存来确保不会出现格式或者数据问题. 为数不多的缺点可能在于锁和对写操作的缺乏支持 --- 因为在设计的时候就认为为只在最开始写入一次.

Linux Kernel 6.17.1
文件名和初始行号符合内核文件, 但是后面的行号可能会因为额外注释偏移

数据结构

14
struct seq_operations;
15

16
struct seq_file {
17
    // buffer
18
    char *buf;
19
    // buffer size
20
    size_t size;
21
    // remaining start
22
    size_t from;
23
    // remaining size
24
    size_t count;
25
    size_t pad_until;
26
    // last show index in list
27
    loff_t index;
28
    // last fd read_pos
29
    loff_t read_pos;
30
    // fd thread-safe guard
31
    struct mutex lock;
32
    const struct seq_operations *op;
33
    int poll_event;
34
    const struct file *file;
35
    // preserve data pointer to any data we want to store
36
    void *private;
37
};
38

39
struct seq_operations {
40
    // setup seq_file with index, return the item pointer
41
    // e.g. we can lock write lock and find the item by index(named pos)
42
    void * (*start) (struct seq_file *m, loff_t *pos);
43
    // finish/cleanup seq_file feeding with given item
44
    // e.g. we can unlock write lock
45
    void (*stop) (struct seq_file *m, void *v);
46
    // roll to next item in list given pos and last item, return new item
47
    // e.g. go throught next pointer and update last_pos
48
    void * (*next) (struct seq_file *m, void *v, loff_t *pos);
49
    // dump item pointer to buffer in seq_file
50
    // e.g. snprintf or memcpy and update count/size
51
    int (*show) (struct seq_file *m, void *v);
52
};

Helpers

判断如果目前 buffer 已经满或溢出(需要 realloc 一个更大的 buffer)

40
/**
41
 * seq_has_overflowed - check if the buffer has overflowed
42
 * @m: the seq_file handle
43
 *
44
 * seq_files have a buffer which may overflow. When this happens a larger
45
 * buffer is reallocated and all the data will be printed again.
46
 * The overflow state is true when m->count == m->size.
47
 *
48
 * Returns true if the buffer received more than it can hold.
49
 */
50
static inline bool seq_has_overflowed(struct seq_file *m)
51
{
52
    return m->count == m->size;
53
}

从 addr 复制 iov 里要求的大小

216
static __always_inline __must_check
217
size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
218
{
219
    if (check_copy_size(addr, bytes, true))
220
        return _copy_to_iter(addr, bytes, i);
221
    return 0;
222
}

从 seq_file 里重新找到指定的偏移量

90
static int traverse(struct seq_file *m, loff_t offset)
91
{
92
    loff_t pos = 0;
93
    int error = 0;
94
    void *p;
95

96
    m->index = 0;
97
    m->count = m->from = 0;
98
    if (!offset)
99
        return 0;
100

101
    if (!m->buf) {
102
        // init default size buffer
103
        m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
104
        if (!m->buf)
105
            return -ENOMEM;
106
    }
107
    // start with given index
108
    p = m->op->start(m, &m->index);
109
    while (p) {
110
        error = PTR_ERR(p);
111
        if (IS_ERR(p))
112
            break;
113
        // dump current item
114
        error = m->op->show(m, p);
115
        if (error < 0)
116
            break;
117
        if (unlikely(error)) {
118
            error = 0;
119
            m->count = 0;
120
        }
121
        if (seq_has_overflowed(m))
122
            goto Eoverflow;
123
        // go next item
124
        p = m->op->next(m, p, &m->index);
125
        // if current buffer exceed required offset
126
        // update from and buffer remaining count
127
        if (pos + m->count > offset) {
128
            m->from = offset - pos;
129
            m->count -= m->from;
130
            break;
131
        }
132
        // otherwise just keep iterate till it reached
133
        pos += m->count;
134
        // update buffer remaining size to zero caused it's before required offset
135
        m->count = 0;
136
        // if reached/perfect match
137
        if (pos == offset)
138
            break;
139
    }
140
    // cleanup
141
    m->op->stop(m, p);
142
    return error;
143

144
Eoverflow:
145
    m->op->stop(m, p);
146
    kvfree(m->buf);
147
    m->count = 0;
148
    // if overflow get bigger buffer and redo the process
149
    m->buf = seq_buf_alloc(m->size <<= 1);
150
    return !m->buf ? -ENOMEM : -EAGAIN;
151
}

Open

41
/**
42
 *  seq_open -  initialize sequential file
43
 *  @file: file we initialize
44
 *  @op: method table describing the sequence
45
 *
46
 *  seq_open() sets @file, associating it with a sequence described
47
 *  by @op.  @op->start() sets the iterator up and returns the first
48
 *  element of sequence. @op->stop() shuts it down.  @op->next()
49
 *  returns the next element of sequence.  @op->show() prints element
50
 *  into the buffer.  In case of error ->start() and ->next() return
51
 *  ERR_PTR(error).  In the end of sequence they return %NULL. ->show()
52
 *  returns 0 in case of success and negative number in case of error.
53
 *  Returning SEQ_SKIP means "discard this element and move on".
54
 *  Note: seq_open() will allocate a struct seq_file and store its
55
 *  pointer in @file->private_data. This pointer should not be modified.
56
 */
57
int seq_open(struct file *file, const struct seq_operations *op)
58
{
59
    struct seq_file *p;
60

61
    WARN_ON(file->private_data);
62

63
    p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
64
    if (!p)
65
        return -ENOMEM;
66

67
    // set private data to seq_file struct
68
    file->private_data = p;
69

70
    mutex_init(&p->lock);
71
    p->op = op;
72

73
    // No refcounting: the lifetime of 'p' is constrained
74
    // to the lifetime of the file.
75
    p->file = file;
76

77
    /*
78
     * seq_files support lseek() and pread().  They do not implement
79
     * write() at all, but we clear FMODE_PWRITE here for historical
80
     * reasons.
81
     *
82
     * If a client of seq_files a) implements file.write() and b) wishes to
83
     * support pwrite() then that client will need to implement its own
84
     * file.open() which calls seq_open() and then sets FMODE_PWRITE.
85
     */
86
    file->f_mode &= ~FMODE_PWRITE;
87
    return 0;
88
}
89
EXPORT_SYMBOL(seq_open);

里面值得注意的是最后把文件的 write 标志强制取消了.

Read

142
/**
143
 *  seq_read -  ->read() method for sequential files.
144
 *  @file: the file to read from
145
 *  @buf: the buffer to read to
146
 *  @size: the maximum number of bytes to read
147
 *  @ppos: the current position in the file
148
 *
149
 *  Ready-made ->f_op->read()
150
 */
151
ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
152
{
153
    // construct a iov request from one-time read
154
    struct iovec iov = { .iov_base = buf, .iov_len = size};
155
    struct kiocb kiocb;
156
    struct iov_iter iter;
157
    ssize_t ret;
158

159
    init_sync_kiocb(&kiocb, file);
160
    iov_iter_init(&iter, ITER_DEST, &iov, 1, size);
161

162
    kiocb.ki_pos = *ppos;
163
    // enter real read function
164
    ret = seq_read_iter(&kiocb, &iter);
165
    *ppos = kiocb.ki_pos;
166
    return ret;
167
}
168
EXPORT_SYMBOL(seq_read);
169

170
/*
171
 * Ready-made ->f_op->read_iter()
172
 */
173
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
174
{
175
    struct seq_file *m = iocb->ki_filp->private_data;
176
    size_t copied = 0;
177
    size_t n;
178
    void *p;
179
    int err = 0;
180

181
    // if request to read 0 byte
182
    if (!iov_iter_count(iter))
183
        return 0;
184

185
    // make sure only one thread enter
186
    mutex_lock(&m->lock);
187

188
    /*
189
     * if request is to read from zero offset, reset iterator to first
190
     * record as it might have been already advanced by previous requests
191
     */
192
    if (iocb->ki_pos == 0) {
193
        m->index = 0;
194
        m->count = 0;
195
    }
196

197
    /* Don't assume ki_pos is where we left it */
198
    if (unlikely(iocb->ki_pos != m->read_pos)) {
199
        // because our record position doesn't match the required position
200
        // it's likely because user want to read from a discrete position
201
        // so we need to re-find/reach to that position from beginning by calling traverse
202
        while ((err = traverse(m, iocb->ki_pos)) == -EAGAIN)
203
            ;
204
        if (err) {
205
            /* With prejudice... */
206
            m->read_pos = 0;
207
            m->index = 0;
208
            m->count = 0;
209
            goto Done;
210
        } else {
211
            m->read_pos = iocb->ki_pos;
212
        }
213
    }
214

215
    /* grab buffer if we didn't have one */
216
    if (!m->buf) {
217
        m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
218
        if (!m->buf)
219
            goto Enomem;
220
    }
221
    // something left in the buffer - copy it out first
222
    if (m->count) {
223
        n = copy_to_iter(m->buf + m->from, m->count, iter);
224
        m->count -= n;
225
        // update start from position for next read(if still something remaining)
226
        m->from += n;
227
        copied += n;
228
        if (m->count)   // hadn't managed to copy everything
229
            goto Done;
230
    }
231
    // get a non-empty record in the buffer
232
    m->from = 0;
233
    p = m->op->start(m, &m->index);
234
    // then fetch one non-empty item into buffer
235
    while (1) {
236
        err = PTR_ERR(p);
237
        if (!p || IS_ERR(p))    // EOF or an error
238
            break;
239
        err = m->op->show(m, p);
240
        if (err < 0)        // hard error
241
            break;
242
        if (unlikely(err))  // ->show() says "skip it"
243
            m->count = 0;
244
        if (unlikely(!m->count)) { // empty record
245
            p = m->op->next(m, p, &m->index);
246
            continue;
247
        }
248
        if (!seq_has_overflowed(m)) // got it
249
            goto Fill;
250
        // need a bigger buffer
251
        m->op->stop(m, p);
252
        kvfree(m->buf);
253
        m->count = 0;
254
        m->buf = seq_buf_alloc(m->size <<= 1);
255
        if (!m->buf)
256
            goto Enomem;
257
        p = m->op->start(m, &m->index);
258
    }
259
    // EOF or an error
260
    m->op->stop(m, p);
261
    m->count = 0;
262
    goto Done;
263
Fill:
264
    // one non-empty record is in the buffer; if they want more,
265
    // try to fit more in, but in any case we need to advance
266
    // the iterator once for every record shown.
267
    while (1) {
268
        size_t offs = m->count;
269
        loff_t pos = m->index;
270

271
        p = m->op->next(m, p, &m->index);
272
        if (pos == m->index) {
273
            pr_info_ratelimited("buggy .next function %ps did not update position index\n",
274
                        m->op->next);
275
            m->index++;
276
        }
277
        if (!p || IS_ERR(p))    // no next record for us
278
            break;
279
        if (m->count >= iov_iter_count(iter))
280
            break;
281
        err = m->op->show(m, p);
282
        if (err > 0) {      // ->show() says "skip it"
283
            m->count = offs;
284
        } else if (err || seq_has_overflowed(m)) {
285
            m->count = offs;
286
            break;
287
        }
288
    }
289
    m->op->stop(m, p);
290
    // feed the request from buffer
291
    n = copy_to_iter(m->buf, m->count, iter);
292
    copied += n;
293
    m->count -= n;
294
    m->from = n;
295
Done:
296
    if (unlikely(!copied)) {
297
        copied = m->count ? -EFAULT : err;
298
    } else {
299
        iocb->ki_pos += copied;
300
        m->read_pos += copied;
301
    }
302
    mutex_unlock(&m->lock);
303
    return copied;
304
Enomem:
305
    err = -ENOMEM;
306
    goto Done;
307
}
308
EXPORT_SYMBOL(seq_read_iter);

Custom implementation

根据这套想法我自己实现了一遍针对 RCU 列表的 proc 读写, 其中有几个前提

所有 item 输出定长, 我这里是 #define LINE_BUFFER_MAX_SIZE (10 + 2 + 20 + 1 + 1) 也就是 %d: %lu\n 格式
读取是读取一个列表, 每一项的长度如上
写入是添加信息到列表

RCU

一些文档

What’s RCU? by kernel documentation
listRCU by kernel documentation
What’s RCU, Fundamentally? by lwn.net

简单来说就是一套由三个原则组成的无锁操作原理

Memory barrier

在这里虽然没有 RCU 那么显著, 但还是隐式地用了, 比如 READ_ONCE, WRITE_ONCE 或没用到的 smp_load_acquire, smp_store_release, 简单来说就是确保数据更新顺序

可以参考 CSDN

helpers

1
// #define debug_output
2
#ifdef debug_output
3
#define INFO(...) pr_warn("[INF] " __VA_ARGS__)
4
#define ERROR(...) pr_err("[ERR] " __VA_ARGS__)
5
#else
6
#define INFO(...)
7
#define ERROR(...)
8
#endif
9

10
// max int is 10. max lu is 20, with a symbol, a space and ending newline
11
#define LINE_BUFFER_MAX_SIZE (10 + 2 + 20 + 1 + 1)

Data structure

1
// example payload
2
typedef struct info {
3
    int pid;
4
    unsigned long cpu_time;
5
    struct list_head node;
6
    struct rcu_head rcu_head;
7
} info;
8

9
typedef struct file_metadata {
10
    // last read end position
11
    loff_t read_pos;
12
    // last stop item index
13
    unsigned int last_index;
14
    // buffer content size
15
    unsigned short line_buf_size;
16
    // buffer read index
17
    unsigned short line_buf_idx;
18
    // enforce thread-safe
19
    struct mutex lock;
20
    // revision to sync of writing
21
    unsigned int revision;
22
    // fixed buffer per item
23
    char line_buf[LINE_BUFFER_MAX_SIZE];
24
} file_metadata;
25

26
static struct kmem_cache *cache __ro_after_init;
27
static struct kmem_cache *info_cache __ro_after_init;
28

29
// global variable to sync write
30
static unsigned int revision = 0;
31

32
// info RCU list
33
static LIST_HEAD(REG_PID_LIST);
34
static DEFINE_MUTEX(PIDS_MUX);

Init and exit

1
static int __init a_module_init(void)
2
{
3
    // reg proc ...
4
    // use kmem_cache_create_usercopy instead of normal `KMEM_CACHE` or `kmem_cache_create`
5
    // caused we need to `copy_to_user` for the line_buf.
6
    cache = kmem_cache_create_usercopy(
7
        "file_metadata", sizeof(file_metadata), 0,
8
        SLAB_ACCOUNT | SLAB_PANIC,
9
        offsetof(file_metadata, line_buf), LINE_BUFFER_MAX_SIZE,
10
        NULL);
11
    if (!cache) {
12
        ERROR("cannot create file_metadata cache\n");
13
        goto cleanup;
14
    }
15
    info_cache = KMEM_CACHE(info, SLAB_ACCOUNT | SLAB_PANIC);
16
    if (!info_cache) {
17
        ERROR("cannot create info cache\n");
18
        kmem_cache_destroy(cache);
19
        goto cleanup;
20
    }
21
    mutex_init(&PIDS_MUX);
22
    INIT_LIST_HEAD(&REG_PID_LIST);
23

24
    // more ...
25

26
    return 0;
27

28
cleanup:
29
    // cleanup ...
30
    return -ENOMEM;
31
}
32

33
static void __exit a_module_exit(void)
34
{
35
    info *pos, *tmp;
36

37
    // more ...
38

39
    mutex_lock(&PIDS_MUX);
40
    list_for_each_entry_safe (pos, tmp, &REG_PID_LIST, node) {
41
        list_del_rcu(&pos->node);
42
        call_rcu(&pos->rcu_head, info_free_rcu);
43
    }
44
    mutex_unlock(&PIDS_MUX);
45

46
    // block until all RCU operations
47
    synchronize_rcu();
48

49
    kmem_cache_destroy(cache);
50
    kmem_cache_destroy(info_cache);
51

52
    mutex_destroy(&PIDS_MUX);
53
}

Custom Read

1
static inline int dump_buf(char buf[], info *pos)
2
{
3
    return snprintf(buf, LINE_BUFFER_MAX_SIZE, "%d: %lu\n", pos->pid,
4
            pos->cpu_time);
5
}
6

7
static ssize_t proc_read(struct file *file, char __user *buf, size_t cnt,
8
                 loff_t *ppos)
9
{
10
    struct file_metadata *p = file->private_data;
11
    info *pos = NULL;
12
    unsigned int idx = 0;
13
    unsigned long ret_s = 0, copied = 0, copy_size = 0;
14

15
    if (cnt == 0)
16
        return 0;
17

18
    INFO("read start, ppos=%lld, cnt=%zu\n", *ppos, cnt);
19

20
    // thread safe for changing p metadata
21
    mutex_lock(&p->lock);
22

23
    // unlikely, but if read_pos don't match, invalidate the line buffer
24
    if (p->read_pos != *ppos) {
25
        p->line_buf_idx = 0;
26
        p->line_buf_size = 0;
27
        // need to go the that ppos position then fill the buffer or start from 0?
28
        // if required pos less than previous pos, or the list is updated find desired item from start
29
        // otherwise just go through to last item index
30
        idx = READ_ONCE(revision);
31
        if (*ppos < p->read_pos || p->revision != idx) {
32
            p->last_index = 0;
33
            p->read_pos = 0;
34
            p->revision = idx;
35
        }
36
    }
37
    // if buffer has remaining bytes
38
    else if (p->line_buf_idx < p->line_buf_size) {
39
        INFO("copy remaining line buffer\n");
40
        copy_size = umin(p->line_buf_size - p->line_buf_idx, cnt);
41
        ret_s = copy_size - copy_to_user(buf,
42
                         p->line_buf + p->line_buf_idx,
43
                         copy_size);
44
        p->line_buf_idx += ret_s;
45
        copied += ret_s;
46
        if (p->line_buf_idx < p->line_buf_size || copied == cnt) {
47
            // user buffer size is smaller or equal to finish this unfinished line buffer
48
            goto DONE;
49
        }
50
        p->line_buf_idx = 0;
51
        p->read_pos += copied;
52
        *ppos += copied;
53
        // advanced the user buffer ptr as well
54
        buf += copied;
55
        copied = 0;
56
        p->last_index++;
57
    }
58

59
    INFO("re-find the designed index\n");
60
    idx = 0;
61
    ret_s = 0;
62
    copy_size = 0;
63
    rcu_read_lock();
64
    list_for_each_entry_rcu (pos, &REG_PID_LIST, node) {
65
        // first need to at the last_index position
66
        if (idx >= p->last_index) {
67
            ret_s = dump_buf(p->line_buf, pos);
68
            if (p->read_pos + copy_size + ret_s > *ppos) {
69
                // got the position
70
                p->last_index = idx;
71
                // ignore the unaligned pos part
72
                p->read_pos += copy_size;
73
                break;
74
            } else {
75
                // continue to find
76
                copy_size += ret_s;
77
            }
78
            ret_s = 0;
79
        }
80
        idx++;
81
    }
82
    rcu_read_unlock();
83
    if (ret_s == 0) {
84
        // unlikely but the required index is out of bound
85
        INFO("require ppos out of bound\n");
86
        goto DONE;
87
    }
88
    INFO("idx=%d\n", p->last_index);
89
    // prepare current index line buffer
90
    p->line_buf_size = ret_s;
91
    copy_size = umin(ret_s, cnt - copied);
92

93
    while (true) {
94
        INFO("feed the buffer, last idx=%d\n", p->last_index);
95
        if (copy_size == 0)
96
            break;
97
        // NOTE: copy_to_user is not atomic and cannot run under rcu lock
98
        ret_s = copy_size -
99
            copy_to_user(buf + copied, p->line_buf, copy_size);
100
        p->line_buf_idx += ret_s;
101
        copied += ret_s;
102
        if (p->line_buf_idx < p->line_buf_size || copied == cnt) {
103
            // user buffer size is smaller or equal to finish this unfinished line buffer
104
            goto DONE;
105
        }
106
        p->line_buf_idx = 0;
107
        p->last_index++;
108
        idx = 0;
109
        rcu_read_lock();
110
        list_for_each_entry_rcu (pos, &REG_PID_LIST, node) {
111
            if (idx++ == p->last_index) {
112
                ret_s = dump_buf(p->line_buf, pos);
113
                p->line_buf_size = ret_s;
114
                copy_size = umin(ret_s, cnt - copied);
115
                break;
116
            }
117
        }
118
        rcu_read_unlock();
119
        if (idx <= p->last_index) {
120
            // reach the end of list
121
            goto DONE;
122
        }
123
    }
124

125
    INFO("read finish\n");
126

127
DONE:
128
    p->read_pos += copied;
129
    mutex_unlock(&p->lock);
130
    *ppos += copied;
131
    return copied;
132
}

Custom Write

1
static ssize_t proc_write(struct file *file, const char __user *buffer,
2
                  size_t count, loff_t *ppos)
3
{
4
    int pid = 0, result;
5
    info *node = NULL;
6

7
    if (*ppos > 0) {
8
        INFO("ignore arbitrary write position info\n");
9
    }
10

11
    result = kstrtoint_from_user(buffer, count, 10, &pid);
12
    if (result != 0) {
13
        ERROR("incoming PID is not valid\n");
14
        return result;
15
    }
16
    if (pid <= 0 || pid > PID_MAX_LIMIT) {
17
        ERROR("PID %d out of valid range\n", pid);
18
        return -EINVAL;
19
    }
20

21
    node = kmem_cache_zalloc(info_cache, GFP_KERNEL);
22
    if (!node) {
23
        ERROR("failed to kmalloc node\n");
24
        return -ENOMEM;
25
    }
26
    node->pid = pid;
27
    // do sth ...
28
    node->cpu_time = bar(pid);
29

30
    // only one thread can do add or del at one time
31
    mutex_lock(&PIDS_MUX);
32
    list_add_rcu(&node->node, &REG_PID_LIST);
33
    WRITE_ONCE(revision, READ_ONCE(revision) + 1);
34
    mutex_unlock(&PIDS_MUX);
35

36
    INFO("register PID=%d\n", pid);
37
W_DONE:
38
    INFO("write finish\n");
39
    *ppos = count;
40
    return count;
41
}

Custom Open, Release

1
int proc_open(struct inode *, struct file *f)
2
{
3
    struct file_metadata *p;
4
    // only read mode fd need additional information
5
    if (f->f_mode & FMODE_READ) {
6
        p = kmem_cache_zalloc(cache, GFP_KERNEL);
7
        if (!p)
8
            return -ENOMEM;
9
        f->private_data = p;
10
        mutex_init(&p->lock);
11
        INFO("reader opened\n");
12
    }
13
    return 0;
14
}
15

16
int proc_release(struct inode *, struct file *f)
17
{
18
    if (f->f_mode & FMODE_READ) {
19
        // Note: `mutex_destroy` is actually doing nothing if we didn't enable mutex_debug flag
20
        mutex_destroy(&((file_metadata *)(f->private_data))->lock);
21
        kmem_cache_free(cache, f->private_data);
22
    }
23
    return 0;
24
}

Update

如果需要更新/删除信息

1
static void info_free_rcu(struct rcu_head *head)
2
{
3
    info *info = container_of(head, info, rcu_head);
4
    kmem_cache_free(info_cache, info);
5
}
6
static void foo()
7
{
8
    info *pos = NULL, *tmp = NULL;
9
    unsigned long cpu_time;
10
    int result;
11
    bool deleted = false;
12

13
    // need lock to protect from writer to writer
14
    mutex_lock(&PIDS_MUX);
15
    list_for_each_entry_safe (pos, tmp, &REG_PID_LIST, node) {
16
        result = bar(&cpu_time); // some process
17
        if (result == -1) {
18
            INFO("remove PID %d\n", pos->pid);
19
            list_del_rcu(&pos->node);
20
            call_rcu(&pos->rcu_head, info_free_rcu);
21
            deleted = true;
22
        } else {
23
            pos->cpu_time = cpu_time;
24
        }
25
    }
26
    if (deleted)
27
        WRITE_ONCE(revision, READ_ONCE(revision) + 1);
28
    mutex_unlock(&PIDS_MUX);
29
}

more ref

可以参考 https://www.cnblogs.com/embedded-linux/p/9751995.html