1
2
3
4
5
6
7
8
9
10
11#include <linux/syscalls.h>
12#include <linux/slab.h>
13#include <linux/sched.h>
14#include <linux/smp_lock.h>
15#include <linux/init.h>
16#include <linux/kernel.h>
17#include <linux/acct.h>
18#include <linux/capability.h>
19#include <linux/cpumask.h>
20#include <linux/module.h>
21#include <linux/sysfs.h>
22#include <linux/seq_file.h>
23#include <linux/mnt_namespace.h>
24#include <linux/namei.h>
25#include <linux/security.h>
26#include <linux/mount.h>
27#include <linux/ramfs.h>
28#include <linux/log2.h>
29#include <linux/idr.h>
30#include <asm/uaccess.h>
31#include <asm/unistd.h>
32#include "pnode.h"
33#include "internal.h"
34
35#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
36#define HASH_SIZE (1UL << HASH_SHIFT)
37
38
39__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
40
41static int event;
42static DEFINE_IDA(mnt_id_ida);
43static DEFINE_IDA(mnt_group_ida);
44
45static struct list_head *mount_hashtable __read_mostly;
46static struct kmem_cache *mnt_cache __read_mostly;
47static struct rw_semaphore namespace_sem;
48
49
50struct kobject *fs_kobj;
51EXPORT_SYMBOL_GPL(fs_kobj);
52
53static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
54{
55 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
56 tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
57 tmp = tmp + (tmp >> HASH_SHIFT);
58 return tmp & (HASH_SIZE - 1);
59}
60
61#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
62
63
64static int mnt_alloc_id(struct vfsmount *mnt)
65{
66 int res;
67
68retry:
69 ida_pre_get(&mnt_id_ida, GFP_KERNEL);
70 spin_lock(&vfsmount_lock);
71 res = ida_get_new(&mnt_id_ida, &mnt->mnt_id);
72 spin_unlock(&vfsmount_lock);
73 if (res == -EAGAIN)
74 goto retry;
75
76 return res;
77}
78
79static void mnt_free_id(struct vfsmount *mnt)
80{
81 spin_lock(&vfsmount_lock);
82 ida_remove(&mnt_id_ida, mnt->mnt_id);
83 spin_unlock(&vfsmount_lock);
84}
85
86
87
88
89
90
91static int mnt_alloc_group_id(struct vfsmount *mnt)
92{
93 if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL))
94 return -ENOMEM;
95
96 return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id);
97}
98
99
100
101
102void mnt_release_group_id(struct vfsmount *mnt)
103{
104 ida_remove(&mnt_group_ida, mnt->mnt_group_id);
105 mnt->mnt_group_id = 0;
106}
107
108struct vfsmount *alloc_vfsmnt(const char *name)
109{
110 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
111 if (mnt) {
112 int err;
113
114 err = mnt_alloc_id(mnt);
115 if (err)
116 goto out_free_cache;
117
118 if (name) {
119 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
120 if (!mnt->mnt_devname)
121 goto out_free_id;
122 }
123
124 atomic_set(&mnt->mnt_count, 1);
125 INIT_LIST_HEAD(&mnt->mnt_hash);
126 INIT_LIST_HEAD(&mnt->mnt_child);
127 INIT_LIST_HEAD(&mnt->mnt_mounts);
128 INIT_LIST_HEAD(&mnt->mnt_list);
129 INIT_LIST_HEAD(&mnt->mnt_expire);
130 INIT_LIST_HEAD(&mnt->mnt_share);
131 INIT_LIST_HEAD(&mnt->mnt_slave_list);
132 INIT_LIST_HEAD(&mnt->mnt_slave);
133 atomic_set(&mnt->__mnt_writers, 0);
134 }
135 return mnt;
136
137out_free_id:
138 mnt_free_id(mnt);
139out_free_cache:
140 kmem_cache_free(mnt_cache, mnt);
141 return NULL;
142}
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163int __mnt_is_readonly(struct vfsmount *mnt)
164{
165 if (mnt->mnt_flags & MNT_READONLY)
166 return 1;
167 if (mnt->mnt_sb->s_flags & MS_RDONLY)
168 return 1;
169 return 0;
170}
171EXPORT_SYMBOL_GPL(__mnt_is_readonly);
172
173struct mnt_writer {
174
175
176
177
178 spinlock_t lock;
179 struct lock_class_key lock_class;
180 unsigned long count;
181 struct vfsmount *mnt;
182} ____cacheline_aligned_in_smp;
183static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
184
185static int __init init_mnt_writers(void)
186{
187 int cpu;
188 for_each_possible_cpu(cpu) {
189 struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
190 spin_lock_init(&writer->lock);
191 lockdep_set_class(&writer->lock, &writer->lock_class);
192 writer->count = 0;
193 }
194 return 0;
195}
196fs_initcall(init_mnt_writers);
197
198static void unlock_mnt_writers(void)
199{
200 int cpu;
201 struct mnt_writer *cpu_writer;
202
203 for_each_possible_cpu(cpu) {
204 cpu_writer = &per_cpu(mnt_writers, cpu);
205 spin_unlock(&cpu_writer->lock);
206 }
207}
208
209static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
210{
211 if (!cpu_writer->mnt)
212 return;
213
214
215
216
217 if (!cpu_writer->count)
218 return;
219 atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
220 cpu_writer->count = 0;
221}
222
223
224
225static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
226 struct vfsmount *mnt)
227{
228 if (cpu_writer->mnt == mnt)
229 return;
230 __clear_mnt_count(cpu_writer);
231 cpu_writer->mnt = mnt;
232}
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252int mnt_want_write(struct vfsmount *mnt)
253{
254 int ret = 0;
255 struct mnt_writer *cpu_writer;
256
257 cpu_writer = &get_cpu_var(mnt_writers);
258 spin_lock(&cpu_writer->lock);
259 if (__mnt_is_readonly(mnt)) {
260 ret = -EROFS;
261 goto out;
262 }
263 use_cpu_writer_for_mount(cpu_writer, mnt);
264 cpu_writer->count++;
265out:
266 spin_unlock(&cpu_writer->lock);
267 put_cpu_var(mnt_writers);
268 return ret;
269}
270EXPORT_SYMBOL_GPL(mnt_want_write);
271
272static void lock_mnt_writers(void)
273{
274 int cpu;
275 struct mnt_writer *cpu_writer;
276
277 for_each_possible_cpu(cpu) {
278 cpu_writer = &per_cpu(mnt_writers, cpu);
279 spin_lock(&cpu_writer->lock);
280 __clear_mnt_count(cpu_writer);
281 cpu_writer->mnt = NULL;
282 }
283}
284
285
286
287
288
289
290
291
292static void handle_write_count_underflow(struct vfsmount *mnt)
293{
294 if (atomic_read(&mnt->__mnt_writers) >=
295 MNT_WRITER_UNDERFLOW_LIMIT)
296 return;
297
298
299
300
301
302 lock_mnt_writers();
303
304
305
306 spin_lock(&vfsmount_lock);
307
308
309
310
311
312 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
313 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
314 WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
315 "count: %d\n",
316 mnt, atomic_read(&mnt->__mnt_writers));
317
318 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
319 }
320 spin_unlock(&vfsmount_lock);
321 unlock_mnt_writers();
322}
323
324
325
326
327
328
329
330
331
332void mnt_drop_write(struct vfsmount *mnt)
333{
334 int must_check_underflow = 0;
335 struct mnt_writer *cpu_writer;
336
337 cpu_writer = &get_cpu_var(mnt_writers);
338 spin_lock(&cpu_writer->lock);
339
340 use_cpu_writer_for_mount(cpu_writer, mnt);
341 if (cpu_writer->count > 0) {
342 cpu_writer->count--;
343 } else {
344 must_check_underflow = 1;
345 atomic_dec(&mnt->__mnt_writers);
346 }
347
348 spin_unlock(&cpu_writer->lock);
349
350
351
352
353
354 if (must_check_underflow)
355 handle_write_count_underflow(mnt);
356
357
358
359
360
361
362
363
364 put_cpu_var(mnt_writers);
365}
366EXPORT_SYMBOL_GPL(mnt_drop_write);
367
368static int mnt_make_readonly(struct vfsmount *mnt)
369{
370 int ret = 0;
371
372 lock_mnt_writers();
373
374
375
376 if (atomic_read(&mnt->__mnt_writers) > 0) {
377 ret = -EBUSY;
378 goto out;
379 }
380
381
382
383
384 spin_lock(&vfsmount_lock);
385 if (!ret)
386 mnt->mnt_flags |= MNT_READONLY;
387 spin_unlock(&vfsmount_lock);
388out:
389 unlock_mnt_writers();
390 return ret;
391}
392
393static void __mnt_unmake_readonly(struct vfsmount *mnt)
394{
395 spin_lock(&vfsmount_lock);
396 mnt->mnt_flags &= ~MNT_READONLY;
397 spin_unlock(&vfsmount_lock);
398}
399
400int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
401{
402 mnt->mnt_sb = sb;
403 mnt->mnt_root = dget(sb->s_root);
404 return 0;
405}
406
407EXPORT_SYMBOL(simple_set_mnt);
408
409void free_vfsmnt(struct vfsmount *mnt)
410{
411 kfree(mnt->mnt_devname);
412 mnt_free_id(mnt);
413 kmem_cache_free(mnt_cache, mnt);
414}
415
416
417
418
419
420struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
421 int dir)
422{
423 struct list_head *head = mount_hashtable + hash(mnt, dentry);
424 struct list_head *tmp = head;
425 struct vfsmount *p, *found = NULL;
426
427 for (;;) {
428 tmp = dir ? tmp->next : tmp->prev;
429 p = NULL;
430 if (tmp == head)
431 break;
432 p = list_entry(tmp, struct vfsmount, mnt_hash);
433 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) {
434 found = p;
435 break;
436 }
437 }
438 return found;
439}
440
441
442
443
444
445struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
446{
447 struct vfsmount *child_mnt;
448 spin_lock(&vfsmount_lock);
449 if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
450 mntget(child_mnt);
451 spin_unlock(&vfsmount_lock);
452 return child_mnt;
453}
454
455static inline int check_mnt(struct vfsmount *mnt)
456{
457 return mnt->mnt_ns == current->nsproxy->mnt_ns;
458}
459
460static void touch_mnt_namespace(struct mnt_namespace *ns)
461{
462 if (ns) {
463 ns->event = ++event;
464 wake_up_interruptible(&ns->poll);
465 }
466}
467
468static void __touch_mnt_namespace(struct mnt_namespace *ns)
469{
470 if (ns && ns->event != event) {
471 ns->event = event;
472 wake_up_interruptible(&ns->poll);
473 }
474}
475
476static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
477{
478 old_path->dentry = mnt->mnt_mountpoint;
479 old_path->mnt = mnt->mnt_parent;
480 mnt->mnt_parent = mnt;
481 mnt->mnt_mountpoint = mnt->mnt_root;
482 list_del_init(&mnt->mnt_child);
483 list_del_init(&mnt->mnt_hash);
484 old_path->dentry->d_mounted--;
485}
486
487void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
488 struct vfsmount *child_mnt)
489{
490 child_mnt->mnt_parent = mntget(mnt);
491 child_mnt->mnt_mountpoint = dget(dentry);
492 dentry->d_mounted++;
493}
494
495static void attach_mnt(struct vfsmount *mnt, struct path *path)
496{
497 mnt_set_mountpoint(path->mnt, path->dentry, mnt);
498 list_add_tail(&mnt->mnt_hash, mount_hashtable +
499 hash(path->mnt, path->dentry));
500 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts);
501}
502
503
504
505
506static void commit_tree(struct vfsmount *mnt)
507{
508 struct vfsmount *parent = mnt->mnt_parent;
509 struct vfsmount *m;
510 LIST_HEAD(head);
511 struct mnt_namespace *n = parent->mnt_ns;
512
513 BUG_ON(parent == mnt);
514
515 list_add_tail(&head, &mnt->mnt_list);
516 list_for_each_entry(m, &head, mnt_list)
517 m->mnt_ns = n;
518 list_splice(&head, n->list.prev);
519
520 list_add_tail(&mnt->mnt_hash, mount_hashtable +
521 hash(parent, mnt->mnt_mountpoint));
522 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
523 touch_mnt_namespace(n);
524}
525
526static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
527{
528 struct list_head *next = p->mnt_mounts.next;
529 if (next == &p->mnt_mounts) {
530 while (1) {
531 if (p == root)
532 return NULL;
533 next = p->mnt_child.next;
534 if (next != &p->mnt_parent->mnt_mounts)
535 break;
536 p = p->mnt_parent;
537 }
538 }
539 return list_entry(next, struct vfsmount, mnt_child);
540}
541
542static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
543{
544 struct list_head *prev = p->mnt_mounts.prev;
545 while (prev != &p->mnt_mounts) {
546 p = list_entry(prev, struct vfsmount, mnt_child);
547 prev = p->mnt_mounts.prev;
548 }
549 return p;
550}
551
552static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
553 int flag)
554{
555 struct super_block *sb = old->mnt_sb;
556 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname);
557
558 if (mnt) {
559 if (flag & (CL_SLAVE | CL_PRIVATE))
560 mnt->mnt_group_id = 0;
561 else
562 mnt->mnt_group_id = old->mnt_group_id;
563
564 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
565 int err = mnt_alloc_group_id(mnt);
566 if (err)
567 goto out_free;
568 }
569
570 mnt->mnt_flags = old->mnt_flags;
571 atomic_inc(&sb->s_active);
572 mnt->mnt_sb = sb;
573 mnt->mnt_root = dget(root);
574 mnt->mnt_mountpoint = mnt->mnt_root;
575 mnt->mnt_parent = mnt;
576
577 if (flag & CL_SLAVE) {
578 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
579 mnt->mnt_master = old;
580 CLEAR_MNT_SHARED(mnt);
581 } else if (!(flag & CL_PRIVATE)) {
582 if ((flag & CL_PROPAGATION) || IS_MNT_SHARED(old))
583 list_add(&mnt->mnt_share, &old->mnt_share);
584 if (IS_MNT_SLAVE(old))
585 list_add(&mnt->mnt_slave, &old->mnt_slave);
586 mnt->mnt_master = old->mnt_master;
587 }
588 if (flag & CL_MAKE_SHARED)
589 set_mnt_shared(mnt);
590
591
592
593 if (flag & CL_EXPIRE) {
594 if (!list_empty(&old->mnt_expire))
595 list_add(&mnt->mnt_expire, &old->mnt_expire);
596 }
597 }
598 return mnt;
599
600 out_free:
601 free_vfsmnt(mnt);
602 return NULL;
603}
604
605static inline void __mntput(struct vfsmount *mnt)
606{
607 int cpu;
608 struct super_block *sb = mnt->mnt_sb;
609
610
611
612
613
614
615 for_each_possible_cpu(cpu) {
616 struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
617 if (cpu_writer->mnt != mnt)
618 continue;
619 spin_lock(&cpu_writer->lock);
620 atomic_add(cpu_writer->count, &mnt->__mnt_writers);
621 cpu_writer->count = 0;
622
623
624
625
626
627 cpu_writer->mnt = NULL;
628 spin_unlock(&cpu_writer->lock);
629 }
630
631
632
633
634
635
636 WARN_ON(atomic_read(&mnt->__mnt_writers));
637 dput(mnt->mnt_root);
638 free_vfsmnt(mnt);
639 deactivate_super(sb);
640}
641
642void mntput_no_expire(struct vfsmount *mnt)
643{
644repeat:
645 if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
646 if (likely(!mnt->mnt_pinned)) {
647 spin_unlock(&vfsmount_lock);
648 __mntput(mnt);
649 return;
650 }
651 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
652 mnt->mnt_pinned = 0;
653 spin_unlock(&vfsmount_lock);
654 acct_auto_close_mnt(mnt);
655 security_sb_umount_close(mnt);
656 goto repeat;
657 }
658}
659
660EXPORT_SYMBOL(mntput_no_expire);
661
662void mnt_pin(struct vfsmount *mnt)
663{
664 spin_lock(&vfsmount_lock);
665 mnt->mnt_pinned++;
666 spin_unlock(&vfsmount_lock);
667}
668
669EXPORT_SYMBOL(mnt_pin);
670
671void mnt_unpin(struct vfsmount *mnt)
672{
673 spin_lock(&vfsmount_lock);
674 if (mnt->mnt_pinned) {
675 atomic_inc(&mnt->mnt_count);
676 mnt->mnt_pinned--;
677 }
678 spin_unlock(&vfsmount_lock);
679}
680
681EXPORT_SYMBOL(mnt_unpin);
682
683static inline void mangle(struct seq_file *m, const char *s)
684{
685 seq_escape(m, s, " \t\n\\");
686}
687
688
689
690
691
692
693
694int generic_show_options(struct seq_file *m, struct vfsmount *mnt)
695{
696 const char *options = mnt->mnt_sb->s_options;
697
698 if (options != NULL && options[0]) {
699 seq_putc(m, ',');
700 mangle(m, options);
701 }
702
703 return 0;
704}
705EXPORT_SYMBOL(generic_show_options);
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720void save_mount_options(struct super_block *sb, char *options)
721{
722 kfree(sb->s_options);
723 sb->s_options = kstrdup(options, GFP_KERNEL);
724}
725EXPORT_SYMBOL(save_mount_options);
726
727#ifdef CONFIG_PROC_FS
728
729static void *m_start(struct seq_file *m, loff_t *pos)
730{
731 struct proc_mounts *p = m->private;
732
733 down_read(&namespace_sem);
734 return seq_list_start(&p->ns->list, *pos);
735}
736
737static void *m_next(struct seq_file *m, void *v, loff_t *pos)
738{
739 struct proc_mounts *p = m->private;
740
741 return seq_list_next(v, &p->ns->list, pos);
742}
743
744static void m_stop(struct seq_file *m, void *v)
745{
746 up_read(&namespace_sem);
747}
748
749struct proc_fs_info {
750 int flag;
751 const char *str;
752};
753
754static int show_sb_opts(struct seq_file *m, struct super_block *sb)
755{
756 static const struct proc_fs_info fs_info[] = {
757 { MS_SYNCHRONOUS, ",sync" },
758 { MS_DIRSYNC, ",dirsync" },
759 { MS_MANDLOCK, ",mand" },
760 { 0, NULL }
761 };
762 const struct proc_fs_info *fs_infop;
763
764 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
765 if (sb->s_flags & fs_infop->flag)
766 seq_puts(m, fs_infop->str);
767 }
768
769 return security_sb_show_options(m, sb);
770}
771
772static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
773{
774 static const struct proc_fs_info mnt_info[] = {
775 { MNT_NOSUID, ",nosuid" },
776 { MNT_NODEV, ",nodev" },
777 { MNT_NOEXEC, ",noexec" },
778 { MNT_NOATIME, ",noatime" },
779 { MNT_NODIRATIME, ",nodiratime" },
780 { MNT_RELATIME, ",relatime" },
781 { 0, NULL }
782 };
783 const struct proc_fs_info *fs_infop;
784
785 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
786 if (mnt->mnt_flags & fs_infop->flag)
787 seq_puts(m, fs_infop->str);
788 }
789}
790
791static void show_type(struct seq_file *m, struct super_block *sb)
792{
793 mangle(m, sb->s_type->name);
794 if (sb->s_subtype && sb->s_subtype[0]) {
795 seq_putc(m, '.');
796 mangle(m, sb->s_subtype);
797 }
798}
799
800static int show_vfsmnt(struct seq_file *m, void *v)
801{
802 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
803 int err = 0;
804 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
805
806 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
807 seq_putc(m, ' ');
808 seq_path(m, &mnt_path, " \t\n\\");
809 seq_putc(m, ' ');
810 show_type(m, mnt->mnt_sb);
811 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
812 err = show_sb_opts(m, mnt->mnt_sb);
813 if (err)
814 goto out;
815 show_mnt_opts(m, mnt);
816 if (mnt->mnt_sb->s_op->show_options)
817 err = mnt->mnt_sb->s_op->show_options(m, mnt);
818 seq_puts(m, " 0 0\n");
819out:
820 return err;
821}
822
823const struct seq_operations mounts_op = {
824 .start = m_start,
825 .next = m_next,
826 .stop = m_stop,
827 .show = show_vfsmnt
828};
829
830static int show_mountinfo(struct seq_file *m, void *v)
831{
832 struct proc_mounts *p = m->private;
833 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
834 struct super_block *sb = mnt->mnt_sb;
835 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
836 struct path root = p->root;
837 int err = 0;
838
839 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
840 MAJOR(sb->s_dev), MINOR(sb->s_dev));
841 seq_dentry(m, mnt->mnt_root, " \t\n\\");
842 seq_putc(m, ' ');
843 seq_path_root(m, &mnt_path, &root, " \t\n\\");
844 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
845
846
847
848
849
850 return SEQ_SKIP;
851 }
852 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
853 show_mnt_opts(m, mnt);
854
855
856 if (IS_MNT_SHARED(mnt))
857 seq_printf(m, " shared:%i", mnt->mnt_group_id);
858 if (IS_MNT_SLAVE(mnt)) {
859 int master = mnt->mnt_master->mnt_group_id;
860 int dom = get_dominating_id(mnt, &p->root);
861 seq_printf(m, " master:%i", master);
862 if (dom && dom != master)
863 seq_printf(m, " propagate_from:%i", dom);
864 }
865 if (IS_MNT_UNBINDABLE(mnt))
866 seq_puts(m, " unbindable");
867
868
869 seq_puts(m, " - ");
870 show_type(m, sb);
871 seq_putc(m, ' ');
872 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
873 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
874 err = show_sb_opts(m, sb);
875 if (err)
876 goto out;
877 if (sb->s_op->show_options)
878 err = sb->s_op->show_options(m, mnt);
879 seq_putc(m, '\n');
880out:
881 return err;
882}
883
884const struct seq_operations mountinfo_op = {
885 .start = m_start,
886 .next = m_next,
887 .stop = m_stop,
888 .show = show_mountinfo,
889};
890
891static int show_vfsstat(struct seq_file *m, void *v)
892{
893 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
894 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
895 int err = 0;
896
897
898 if (mnt->mnt_devname) {
899 seq_puts(m, "device ");
900 mangle(m, mnt->mnt_devname);
901 } else
902 seq_puts(m, "no device");
903
904
905 seq_puts(m, " mounted on ");
906 seq_path(m, &mnt_path, " \t\n\\");
907 seq_putc(m, ' ');
908
909
910 seq_puts(m, "with fstype ");
911 show_type(m, mnt->mnt_sb);
912
913
914 if (mnt->mnt_sb->s_op->show_stats) {
915 seq_putc(m, ' ');
916 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
917 }
918
919 seq_putc(m, '\n');
920 return err;
921}
922
923const struct seq_operations mountstats_op = {
924 .start = m_start,
925 .next = m_next,
926 .stop = m_stop,
927 .show = show_vfsstat,
928};
929#endif
930
931
932
933
934
935
936
937
938
939int may_umount_tree(struct vfsmount *mnt)
940{
941 int actual_refs = 0;
942 int minimum_refs = 0;
943 struct vfsmount *p;
944
945 spin_lock(&vfsmount_lock);
946 for (p = mnt; p; p = next_mnt(p, mnt)) {
947 actual_refs += atomic_read(&p->mnt_count);
948 minimum_refs += 2;
949 }
950 spin_unlock(&vfsmount_lock);
951
952 if (actual_refs > minimum_refs)
953 return 0;
954
955 return 1;
956}
957
958EXPORT_SYMBOL(may_umount_tree);
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973int may_umount(struct vfsmount *mnt)
974{
975 int ret = 1;
976 spin_lock(&vfsmount_lock);
977 if (propagate_mount_busy(mnt, 2))
978 ret = 0;
979 spin_unlock(&vfsmount_lock);
980 return ret;
981}
982
983EXPORT_SYMBOL(may_umount);
984
985void release_mounts(struct list_head *head)
986{
987 struct vfsmount *mnt;
988 while (!list_empty(head)) {
989 mnt = list_first_entry(head, struct vfsmount, mnt_hash);
990 list_del_init(&mnt->mnt_hash);
991 if (mnt->mnt_parent != mnt) {
992 struct dentry *dentry;
993 struct vfsmount *m;
994 spin_lock(&vfsmount_lock);
995 dentry = mnt->mnt_mountpoint;
996 m = mnt->mnt_parent;
997 mnt->mnt_mountpoint = mnt->mnt_root;
998 mnt->mnt_parent = mnt;
999 m->mnt_ghosts--;
1000 spin_unlock(&vfsmount_lock);
1001 dput(dentry);
1002 mntput(m);
1003 }
1004 mntput(mnt);
1005 }
1006}
1007
1008void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1009{
1010 struct vfsmount *p;
1011
1012 for (p = mnt; p; p = next_mnt(p, mnt))
1013 list_move(&p->mnt_hash, kill);
1014
1015 if (propagate)
1016 propagate_umount(kill);
1017
1018 list_for_each_entry(p, kill, mnt_hash) {
1019 list_del_init(&p->mnt_expire);
1020 list_del_init(&p->mnt_list);
1021 __touch_mnt_namespace(p->mnt_ns);
1022 p->mnt_ns = NULL;
1023 list_del_init(&p->mnt_child);
1024 if (p->mnt_parent != p) {
1025 p->mnt_parent->mnt_ghosts++;
1026 p->mnt_mountpoint->d_mounted--;
1027 }
1028 change_mnt_propagation(p, MS_PRIVATE);
1029 }
1030}
1031
1032static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts);
1033
1034static int do_umount(struct vfsmount *mnt, int flags)
1035{
1036 struct super_block *sb = mnt->mnt_sb;
1037 int retval;
1038 LIST_HEAD(umount_list);
1039
1040 retval = security_sb_umount(mnt, flags);
1041 if (retval)
1042 return retval;
1043
1044
1045
1046
1047
1048
1049
1050 if (flags & MNT_EXPIRE) {
1051 if (mnt == current->fs->root.mnt ||
1052 flags & (MNT_FORCE | MNT_DETACH))
1053 return -EINVAL;
1054
1055 if (atomic_read(&mnt->mnt_count) != 2)
1056 return -EBUSY;
1057
1058 if (!xchg(&mnt->mnt_expiry_mark, 1))
1059 return -EAGAIN;
1060 }
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072 if (flags & MNT_FORCE && sb->s_op->umount_begin) {
1073 lock_kernel();
1074 sb->s_op->umount_begin(sb);
1075 unlock_kernel();
1076 }
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1088
1089
1090
1091
1092 down_write(&sb->s_umount);
1093 if (!(sb->s_flags & MS_RDONLY)) {
1094 lock_kernel();
1095 retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
1096 unlock_kernel();
1097 }
1098 up_write(&sb->s_umount);
1099 return retval;
1100 }
1101
1102 down_write(&namespace_sem);
1103 spin_lock(&vfsmount_lock);
1104 event++;
1105
1106 if (!(flags & MNT_DETACH))
1107 shrink_submounts(mnt, &umount_list);
1108
1109 retval = -EBUSY;
1110 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1111 if (!list_empty(&mnt->mnt_list))
1112 umount_tree(mnt, 1, &umount_list);
1113 retval = 0;
1114 }
1115 spin_unlock(&vfsmount_lock);
1116 if (retval)
1117 security_sb_umount_busy(mnt);
1118 up_write(&namespace_sem);
1119 release_mounts(&umount_list);
1120 return retval;
1121}
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131asmlinkage long sys_umount(char __user * name, int flags)
1132{
1133 struct path path;
1134 int retval;
1135
1136 retval = user_path(name, &path);
1137 if (retval)
1138 goto out;
1139 retval = -EINVAL;
1140 if (path.dentry != path.mnt->mnt_root)
1141 goto dput_and_out;
1142 if (!check_mnt(path.mnt))
1143 goto dput_and_out;
1144
1145 retval = -EPERM;
1146 if (!capable(CAP_SYS_ADMIN))
1147 goto dput_and_out;
1148
1149 retval = do_umount(path.mnt, flags);
1150dput_and_out:
1151
1152 dput(path.dentry);
1153 mntput_no_expire(path.mnt);
1154out:
1155 return retval;
1156}
1157
1158#ifdef __ARCH_WANT_SYS_OLDUMOUNT
1159
1160
1161
1162
1163asmlinkage long sys_oldumount(char __user * name)
1164{
1165 return sys_umount(name, 0);
1166}
1167
1168#endif
1169
1170static int mount_is_safe(struct path *path)
1171{
1172 if (capable(CAP_SYS_ADMIN))
1173 return 0;
1174 return -EPERM;
1175#ifdef notyet
1176 if (S_ISLNK(path->dentry->d_inode->i_mode))
1177 return -EPERM;
1178 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1179 if (current->uid != path->dentry->d_inode->i_uid)
1180 return -EPERM;
1181 }
1182 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1183 return -EPERM;
1184 return 0;
1185#endif
1186}
1187
1188struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1189 int flag)
1190{
1191 struct vfsmount *res, *p, *q, *r, *s;
1192 struct path path;
1193
1194 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1195 return NULL;
1196
1197 res = q = clone_mnt(mnt, dentry, flag);
1198 if (!q)
1199 goto Enomem;
1200 q->mnt_mountpoint = mnt->mnt_mountpoint;
1201
1202 p = mnt;
1203 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1204 if (!is_subdir(r->mnt_mountpoint, dentry))
1205 continue;
1206
1207 for (s = r; s; s = next_mnt(s, r)) {
1208 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(s)) {
1209 s = skip_mnt_tree(s);
1210 continue;
1211 }
1212 while (p != s->mnt_parent) {
1213 p = p->mnt_parent;
1214 q = q->mnt_parent;
1215 }
1216 p = s;
1217 path.mnt = q;
1218 path.dentry = p->mnt_mountpoint;
1219 q = clone_mnt(p, p->mnt_root, flag);
1220 if (!q)
1221 goto Enomem;
1222 spin_lock(&vfsmount_lock);
1223 list_add_tail(&q->mnt_list, &res->mnt_list);
1224 attach_mnt(q, &path);
1225 spin_unlock(&vfsmount_lock);
1226 }
1227 }
1228 return res;
1229Enomem:
1230 if (res) {
1231 LIST_HEAD(umount_list);
1232 spin_lock(&vfsmount_lock);
1233 umount_tree(res, 0, &umount_list);
1234 spin_unlock(&vfsmount_lock);
1235 release_mounts(&umount_list);
1236 }
1237 return NULL;
1238}
1239
1240struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
1241{
1242 struct vfsmount *tree;
1243 down_write(&namespace_sem);
1244 tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
1245 up_write(&namespace_sem);
1246 return tree;
1247}
1248
1249void drop_collected_mounts(struct vfsmount *mnt)
1250{
1251 LIST_HEAD(umount_list);
1252 down_write(&namespace_sem);
1253 spin_lock(&vfsmount_lock);
1254 umount_tree(mnt, 0, &umount_list);
1255 spin_unlock(&vfsmount_lock);
1256 up_write(&namespace_sem);
1257 release_mounts(&umount_list);
1258}
1259
1260static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1261{
1262 struct vfsmount *p;
1263
1264 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1265 if (p->mnt_group_id && !IS_MNT_SHARED(p))
1266 mnt_release_group_id(p);
1267 }
1268}
1269
1270static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1271{
1272 struct vfsmount *p;
1273
1274 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1275 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
1276 int err = mnt_alloc_group_id(p);
1277 if (err) {
1278 cleanup_group_ids(mnt, p);
1279 return err;
1280 }
1281 }
1282 }
1283
1284 return 0;
1285}
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350static int attach_recursive_mnt(struct vfsmount *source_mnt,
1351 struct path *path, struct path *parent_path)
1352{
1353 LIST_HEAD(tree_list);
1354 struct vfsmount *dest_mnt = path->mnt;
1355 struct dentry *dest_dentry = path->dentry;
1356 struct vfsmount *child, *p;
1357 int err;
1358
1359 if (IS_MNT_SHARED(dest_mnt)) {
1360 err = invent_group_ids(source_mnt, true);
1361 if (err)
1362 goto out;
1363 }
1364 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
1365 if (err)
1366 goto out_cleanup_ids;
1367
1368 if (IS_MNT_SHARED(dest_mnt)) {
1369 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1370 set_mnt_shared(p);
1371 }
1372
1373 spin_lock(&vfsmount_lock);
1374 if (parent_path) {
1375 detach_mnt(source_mnt, parent_path);
1376 attach_mnt(source_mnt, path);
1377 touch_mnt_namespace(current->nsproxy->mnt_ns);
1378 } else {
1379 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1380 commit_tree(source_mnt);
1381 }
1382
1383 list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
1384 list_del_init(&child->mnt_hash);
1385 commit_tree(child);
1386 }
1387 spin_unlock(&vfsmount_lock);
1388 return 0;
1389
1390 out_cleanup_ids:
1391 if (IS_MNT_SHARED(dest_mnt))
1392 cleanup_group_ids(source_mnt, NULL);
1393 out:
1394 return err;
1395}
1396
1397static int graft_tree(struct vfsmount *mnt, struct path *path)
1398{
1399 int err;
1400 if (mnt->mnt_sb->s_flags & MS_NOUSER)
1401 return -EINVAL;
1402
1403 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1404 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
1405 return -ENOTDIR;
1406
1407 err = -ENOENT;
1408 mutex_lock(&path->dentry->d_inode->i_mutex);
1409 if (IS_DEADDIR(path->dentry->d_inode))
1410 goto out_unlock;
1411
1412 err = security_sb_check_sb(mnt, path);
1413 if (err)
1414 goto out_unlock;
1415
1416 err = -ENOENT;
1417 if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
1418 err = attach_recursive_mnt(mnt, path, NULL);
1419out_unlock:
1420 mutex_unlock(&path->dentry->d_inode->i_mutex);
1421 if (!err)
1422 security_sb_post_addmount(mnt, path);
1423 return err;
1424}
1425
1426
1427
1428
1429static int do_change_type(struct path *path, int flag)
1430{
1431 struct vfsmount *m, *mnt = path->mnt;
1432 int recurse = flag & MS_REC;
1433 int type = flag & ~MS_REC;
1434 int err = 0;
1435
1436 if (!capable(CAP_SYS_ADMIN))
1437 return -EPERM;
1438
1439 if (path->dentry != path->mnt->mnt_root)
1440 return -EINVAL;
1441
1442 down_write(&namespace_sem);
1443 if (type == MS_SHARED) {
1444 err = invent_group_ids(mnt, recurse);
1445 if (err)
1446 goto out_unlock;
1447 }
1448
1449 spin_lock(&vfsmount_lock);
1450 for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
1451 change_mnt_propagation(m, type);
1452 spin_unlock(&vfsmount_lock);
1453
1454 out_unlock:
1455 up_write(&namespace_sem);
1456 return err;
1457}
1458
1459
1460
1461
1462static int do_loopback(struct path *path, char *old_name,
1463 int recurse)
1464{
1465 struct path old_path;
1466 struct vfsmount *mnt = NULL;
1467 int err = mount_is_safe(path);
1468 if (err)
1469 return err;
1470 if (!old_name || !*old_name)
1471 return -EINVAL;
1472 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1473 if (err)
1474 return err;
1475
1476 down_write(&namespace_sem);
1477 err = -EINVAL;
1478 if (IS_MNT_UNBINDABLE(old_path.mnt))
1479 goto out;
1480
1481 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1482 goto out;
1483
1484 err = -ENOMEM;
1485 if (recurse)
1486 mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
1487 else
1488 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1489
1490 if (!mnt)
1491 goto out;
1492
1493 err = graft_tree(mnt, path);
1494 if (err) {
1495 LIST_HEAD(umount_list);
1496 spin_lock(&vfsmount_lock);
1497 umount_tree(mnt, 0, &umount_list);
1498 spin_unlock(&vfsmount_lock);
1499 release_mounts(&umount_list);
1500 }
1501
1502out:
1503 up_write(&namespace_sem);
1504 path_put(&old_path);
1505 return err;
1506}
1507
1508static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1509{
1510 int error = 0;
1511 int readonly_request = 0;
1512
1513 if (ms_flags & MS_RDONLY)
1514 readonly_request = 1;
1515 if (readonly_request == __mnt_is_readonly(mnt))
1516 return 0;
1517
1518 if (readonly_request)
1519 error = mnt_make_readonly(mnt);
1520 else
1521 __mnt_unmake_readonly(mnt);
1522 return error;
1523}
1524
1525
1526
1527
1528
1529
1530static int do_remount(struct path *path, int flags, int mnt_flags,
1531 void *data)
1532{
1533 int err;
1534 struct super_block *sb = path->mnt->mnt_sb;
1535
1536 if (!capable(CAP_SYS_ADMIN))
1537 return -EPERM;
1538
1539 if (!check_mnt(path->mnt))
1540 return -EINVAL;
1541
1542 if (path->dentry != path->mnt->mnt_root)
1543 return -EINVAL;
1544
1545 down_write(&sb->s_umount);
1546 if (flags & MS_BIND)
1547 err = change_mount_flags(path->mnt, flags);
1548 else
1549 err = do_remount_sb(sb, flags, data, 0);
1550 if (!err)
1551 path->mnt->mnt_flags = mnt_flags;
1552 up_write(&sb->s_umount);
1553 if (!err) {
1554 security_sb_post_remount(path->mnt, flags, data);
1555
1556 spin_lock(&vfsmount_lock);
1557 touch_mnt_namespace(path->mnt->mnt_ns);
1558 spin_unlock(&vfsmount_lock);
1559 }
1560 return err;
1561}
1562
1563static inline int tree_contains_unbindable(struct vfsmount *mnt)
1564{
1565 struct vfsmount *p;
1566 for (p = mnt; p; p = next_mnt(p, mnt)) {
1567 if (IS_MNT_UNBINDABLE(p))
1568 return 1;
1569 }
1570 return 0;
1571}
1572
1573static int do_move_mount(struct path *path, char *old_name)
1574{
1575 struct path old_path, parent_path;
1576 struct vfsmount *p;
1577 int err = 0;
1578 if (!capable(CAP_SYS_ADMIN))
1579 return -EPERM;
1580 if (!old_name || !*old_name)
1581 return -EINVAL;
1582 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1583 if (err)
1584 return err;
1585
1586 down_write(&namespace_sem);
1587 while (d_mountpoint(path->dentry) &&
1588 follow_down(&path->mnt, &path->dentry))
1589 ;
1590 err = -EINVAL;
1591 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1592 goto out;
1593
1594 err = -ENOENT;
1595 mutex_lock(&path->dentry->d_inode->i_mutex);
1596 if (IS_DEADDIR(path->dentry->d_inode))
1597 goto out1;
1598
1599 if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
1600 goto out1;
1601
1602 err = -EINVAL;
1603 if (old_path.dentry != old_path.mnt->mnt_root)
1604 goto out1;
1605
1606 if (old_path.mnt == old_path.mnt->mnt_parent)
1607 goto out1;
1608
1609 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1610 S_ISDIR(old_path.dentry->d_inode->i_mode))
1611 goto out1;
1612
1613
1614
1615 if (old_path.mnt->mnt_parent &&
1616 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1617 goto out1;
1618
1619
1620
1621
1622 if (IS_MNT_SHARED(path->mnt) &&
1623 tree_contains_unbindable(old_path.mnt))
1624 goto out1;
1625 err = -ELOOP;
1626 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
1627 if (p == old_path.mnt)
1628 goto out1;
1629
1630 err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
1631 if (err)
1632 goto out1;
1633
1634
1635
1636 list_del_init(&old_path.mnt->mnt_expire);
1637out1:
1638 mutex_unlock(&path->dentry->d_inode->i_mutex);
1639out:
1640 up_write(&namespace_sem);
1641 if (!err)
1642 path_put(&parent_path);
1643 path_put(&old_path);
1644 return err;
1645}
1646
1647
1648
1649
1650
1651static int do_new_mount(struct path *path, char *type, int flags,
1652 int mnt_flags, char *name, void *data)
1653{
1654 struct vfsmount *mnt;
1655
1656 if (!type || !memchr(type, 0, PAGE_SIZE))
1657 return -EINVAL;
1658
1659
1660 if (!capable(CAP_SYS_ADMIN))
1661 return -EPERM;
1662
1663 mnt = do_kern_mount(type, flags, name, data);
1664 if (IS_ERR(mnt))
1665 return PTR_ERR(mnt);
1666
1667 return do_add_mount(mnt, path, mnt_flags, NULL);
1668}
1669
1670
1671
1672
1673
1674int do_add_mount(struct vfsmount *newmnt, struct path *path,
1675 int mnt_flags, struct list_head *fslist)
1676{
1677 int err;
1678
1679 down_write(&namespace_sem);
1680
1681 while (d_mountpoint(path->dentry) &&
1682 follow_down(&path->mnt, &path->dentry))
1683 ;
1684 err = -EINVAL;
1685 if (!check_mnt(path->mnt))
1686 goto unlock;
1687
1688
1689 err = -EBUSY;
1690 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1691 path->mnt->mnt_root == path->dentry)
1692 goto unlock;
1693
1694 err = -EINVAL;
1695 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1696 goto unlock;
1697
1698 newmnt->mnt_flags = mnt_flags;
1699 if ((err = graft_tree(newmnt, path)))
1700 goto unlock;
1701
1702 if (fslist)
1703 list_add_tail(&newmnt->mnt_expire, fslist);
1704
1705 up_write(&namespace_sem);
1706 return 0;
1707
1708unlock:
1709 up_write(&namespace_sem);
1710 mntput(newmnt);
1711 return err;
1712}
1713
1714EXPORT_SYMBOL_GPL(do_add_mount);
1715
1716
1717
1718
1719
1720
1721void mark_mounts_for_expiry(struct list_head *mounts)
1722{
1723 struct vfsmount *mnt, *next;
1724 LIST_HEAD(graveyard);
1725 LIST_HEAD(umounts);
1726
1727 if (list_empty(mounts))
1728 return;
1729
1730 down_write(&namespace_sem);
1731 spin_lock(&vfsmount_lock);
1732
1733
1734
1735
1736
1737
1738
1739 list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
1740 if (!xchg(&mnt->mnt_expiry_mark, 1) ||
1741 propagate_mount_busy(mnt, 1))
1742 continue;
1743 list_move(&mnt->mnt_expire, &graveyard);
1744 }
1745 while (!list_empty(&graveyard)) {
1746 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire);
1747 touch_mnt_namespace(mnt->mnt_ns);
1748 umount_tree(mnt, 1, &umounts);
1749 }
1750 spin_unlock(&vfsmount_lock);
1751 up_write(&namespace_sem);
1752
1753 release_mounts(&umounts);
1754}
1755
1756EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
1757
1758
1759
1760
1761
1762
1763
1764static int select_submounts(struct vfsmount *parent, struct list_head *graveyard)
1765{
1766 struct vfsmount *this_parent = parent;
1767 struct list_head *next;
1768 int found = 0;
1769
1770repeat:
1771 next = this_parent->mnt_mounts.next;
1772resume:
1773 while (next != &this_parent->mnt_mounts) {
1774 struct list_head *tmp = next;
1775 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child);
1776
1777 next = tmp->next;
1778 if (!(mnt->mnt_flags & MNT_SHRINKABLE))
1779 continue;
1780
1781
1782
1783 if (!list_empty(&mnt->mnt_mounts)) {
1784 this_parent = mnt;
1785 goto repeat;
1786 }
1787
1788 if (!propagate_mount_busy(mnt, 1)) {
1789 list_move_tail(&mnt->mnt_expire, graveyard);
1790 found++;
1791 }
1792 }
1793
1794
1795
1796 if (this_parent != parent) {
1797 next = this_parent->mnt_child.next;
1798 this_parent = this_parent->mnt_parent;
1799 goto resume;
1800 }
1801 return found;
1802}
1803
1804
1805
1806
1807
1808static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
1809{
1810 LIST_HEAD(graveyard);
1811 struct vfsmount *m;
1812
1813
1814 while (select_submounts(mnt, &graveyard)) {
1815 while (!list_empty(&graveyard)) {
1816 m = list_first_entry(&graveyard, struct vfsmount,
1817 mnt_expire);
1818 touch_mnt_namespace(m->mnt_ns);
1819 umount_tree(m, 1, umounts);
1820 }
1821 }
1822}
1823
1824
1825
1826
1827
1828
1829
1830static long exact_copy_from_user(void *to, const void __user * from,
1831 unsigned long n)
1832{
1833 char *t = to;
1834 const char __user *f = from;
1835 char c;
1836
1837 if (!access_ok(VERIFY_READ, from, n))
1838 return n;
1839
1840 while (n) {
1841 if (__get_user(c, f)) {
1842 memset(t, 0, n);
1843 break;
1844 }
1845 *t++ = c;
1846 f++;
1847 n--;
1848 }
1849 return n;
1850}
1851
1852int copy_mount_options(const void __user * data, unsigned long *where)
1853{
1854 int i;
1855 unsigned long page;
1856 unsigned long size;
1857
1858 *where = 0;
1859 if (!data)
1860 return 0;
1861
1862 if (!(page = __get_free_page(GFP_KERNEL)))
1863 return -ENOMEM;
1864
1865
1866
1867
1868
1869
1870 size = TASK_SIZE - (unsigned long)data;
1871 if (size > PAGE_SIZE)
1872 size = PAGE_SIZE;
1873
1874 i = size - exact_copy_from_user((void *)page, data, size);
1875 if (!i) {
1876 free_page(page);
1877 return -EFAULT;
1878 }
1879 if (i != PAGE_SIZE)
1880 memset((char *)page + i, 0, PAGE_SIZE - i);
1881 *where = page;
1882 return 0;
1883}
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899long do_mount(char *dev_name, char *dir_name, char *type_page,
1900 unsigned long flags, void *data_page)
1901{
1902 struct path path;
1903 int retval = 0;
1904 int mnt_flags = 0;
1905
1906
1907 if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
1908 flags &= ~MS_MGC_MSK;
1909
1910
1911
1912 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1913 return -EINVAL;
1914 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1915 return -EINVAL;
1916
1917 if (data_page)
1918 ((char *)data_page)[PAGE_SIZE - 1] = 0;
1919
1920
1921 if (flags & MS_NOSUID)
1922 mnt_flags |= MNT_NOSUID;
1923 if (flags & MS_NODEV)
1924 mnt_flags |= MNT_NODEV;
1925 if (flags & MS_NOEXEC)
1926 mnt_flags |= MNT_NOEXEC;
1927 if (flags & MS_NOATIME)
1928 mnt_flags |= MNT_NOATIME;
1929 if (flags & MS_NODIRATIME)
1930 mnt_flags |= MNT_NODIRATIME;
1931 if (flags & MS_RELATIME)
1932 mnt_flags |= MNT_RELATIME;
1933 if (flags & MS_RDONLY)
1934 mnt_flags |= MNT_READONLY;
1935
1936 flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE |
1937 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
1938
1939
1940 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1941 if (retval)
1942 return retval;
1943
1944 retval = security_sb_mount(dev_name, &path,
1945 type_page, flags, data_page);
1946 if (retval)
1947 goto dput_out;
1948
1949 if (flags & MS_REMOUNT)
1950 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1951 data_page);
1952 else if (flags & MS_BIND)
1953 retval = do_loopback(&path, dev_name, flags & MS_REC);
1954 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1955 retval = do_change_type(&path, flags);
1956 else if (flags & MS_MOVE)
1957 retval = do_move_mount(&path, dev_name);
1958 else
1959 retval = do_new_mount(&path, type_page, flags, mnt_flags,
1960 dev_name, data_page);
1961dput_out:
1962 path_put(&path);
1963 return retval;
1964}
1965
1966
1967
1968
1969
1970static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1971 struct fs_struct *fs)
1972{
1973 struct mnt_namespace *new_ns;
1974 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1975 struct vfsmount *p, *q;
1976
1977 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
1978 if (!new_ns)
1979 return ERR_PTR(-ENOMEM);
1980
1981 atomic_set(&new_ns->count, 1);
1982 INIT_LIST_HEAD(&new_ns->list);
1983 init_waitqueue_head(&new_ns->poll);
1984 new_ns->event = 0;
1985
1986 down_write(&namespace_sem);
1987
1988 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root,
1989 CL_COPY_ALL | CL_EXPIRE);
1990 if (!new_ns->root) {
1991 up_write(&namespace_sem);
1992 kfree(new_ns);
1993 return ERR_PTR(-ENOMEM);;
1994 }
1995 spin_lock(&vfsmount_lock);
1996 list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
1997 spin_unlock(&vfsmount_lock);
1998
1999
2000
2001
2002
2003
2004 p = mnt_ns->root;
2005 q = new_ns->root;
2006 while (p) {
2007 q->mnt_ns = new_ns;
2008 if (fs) {
2009 if (p == fs->root.mnt) {
2010 rootmnt = p;
2011 fs->root.mnt = mntget(q);
2012 }
2013 if (p == fs->pwd.mnt) {
2014 pwdmnt = p;
2015 fs->pwd.mnt = mntget(q);
2016 }
2017 }
2018 p = next_mnt(p, mnt_ns->root);
2019 q = next_mnt(q, new_ns->root);
2020 }
2021 up_write(&namespace_sem);
2022
2023 if (rootmnt)
2024 mntput(rootmnt);
2025 if (pwdmnt)
2026 mntput(pwdmnt);
2027
2028 return new_ns;
2029}
2030
2031struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2032 struct fs_struct *new_fs)
2033{
2034 struct mnt_namespace *new_ns;
2035
2036 BUG_ON(!ns);
2037 get_mnt_ns(ns);
2038
2039 if (!(flags & CLONE_NEWNS))
2040 return ns;
2041
2042 new_ns = dup_mnt_ns(ns, new_fs);
2043
2044 put_mnt_ns(ns);
2045 return new_ns;
2046}
2047
2048asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
2049 char __user * type, unsigned long flags,
2050 void __user * data)
2051{
2052 int retval;
2053 unsigned long data_page;
2054 unsigned long type_page;
2055 unsigned long dev_page;
2056 char *dir_page;
2057
2058 retval = copy_mount_options(type, &type_page);
2059 if (retval < 0)
2060 return retval;
2061
2062 dir_page = getname(dir_name);
2063 retval = PTR_ERR(dir_page);
2064 if (IS_ERR(dir_page))
2065 goto out1;
2066
2067 retval = copy_mount_options(dev_name, &dev_page);
2068 if (retval < 0)
2069 goto out2;
2070
2071 retval = copy_mount_options(data, &data_page);
2072 if (retval < 0)
2073 goto out3;
2074
2075 lock_kernel();
2076 retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
2077 flags, (void *)data_page);
2078 unlock_kernel();
2079 free_page(data_page);
2080
2081out3:
2082 free_page(dev_page);
2083out2:
2084 putname(dir_page);
2085out1:
2086 free_page(type_page);
2087 return retval;
2088}
2089
2090
2091
2092
2093
2094void set_fs_root(struct fs_struct *fs, struct path *path)
2095{
2096 struct path old_root;
2097
2098 write_lock(&fs->lock);
2099 old_root = fs->root;
2100 fs->root = *path;
2101 path_get(path);
2102 write_unlock(&fs->lock);
2103 if (old_root.dentry)
2104 path_put(&old_root);
2105}
2106
2107
2108
2109
2110
2111void set_fs_pwd(struct fs_struct *fs, struct path *path)
2112{
2113 struct path old_pwd;
2114
2115 write_lock(&fs->lock);
2116 old_pwd = fs->pwd;
2117 fs->pwd = *path;
2118 path_get(path);
2119 write_unlock(&fs->lock);
2120
2121 if (old_pwd.dentry)
2122 path_put(&old_pwd);
2123}
2124
2125static void chroot_fs_refs(struct path *old_root, struct path *new_root)
2126{
2127 struct task_struct *g, *p;
2128 struct fs_struct *fs;
2129
2130 read_lock(&tasklist_lock);
2131 do_each_thread(g, p) {
2132 task_lock(p);
2133 fs = p->fs;
2134 if (fs) {
2135 atomic_inc(&fs->count);
2136 task_unlock(p);
2137 if (fs->root.dentry == old_root->dentry
2138 && fs->root.mnt == old_root->mnt)
2139 set_fs_root(fs, new_root);
2140 if (fs->pwd.dentry == old_root->dentry
2141 && fs->pwd.mnt == old_root->mnt)
2142 set_fs_pwd(fs, new_root);
2143 put_fs_struct(fs);
2144 } else
2145 task_unlock(p);
2146 } while_each_thread(g, p);
2147 read_unlock(&tasklist_lock);
2148}
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175asmlinkage long sys_pivot_root(const char __user * new_root,
2176 const char __user * put_old)
2177{
2178 struct vfsmount *tmp;
2179 struct path new, old, parent_path, root_parent, root;
2180 int error;
2181
2182 if (!capable(CAP_SYS_ADMIN))
2183 return -EPERM;
2184
2185 error = user_path_dir(new_root, &new);
2186 if (error)
2187 goto out0;
2188 error = -EINVAL;
2189 if (!check_mnt(new.mnt))
2190 goto out1;
2191
2192 error = user_path_dir(put_old, &old);
2193 if (error)
2194 goto out1;
2195
2196 error = security_sb_pivotroot(&old, &new);
2197 if (error) {
2198 path_put(&old);
2199 goto out1;
2200 }
2201
2202 read_lock(¤t->fs->lock);
2203 root = current->fs->root;
2204 path_get(¤t->fs->root);
2205 read_unlock(¤t->fs->lock);
2206 down_write(&namespace_sem);
2207 mutex_lock(&old.dentry->d_inode->i_mutex);
2208 error = -EINVAL;
2209 if (IS_MNT_SHARED(old.mnt) ||
2210 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2211 IS_MNT_SHARED(root.mnt->mnt_parent))
2212 goto out2;
2213 if (!check_mnt(root.mnt))
2214 goto out2;
2215 error = -ENOENT;
2216 if (IS_DEADDIR(new.dentry->d_inode))
2217 goto out2;
2218 if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
2219 goto out2;
2220 if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
2221 goto out2;
2222 error = -EBUSY;
2223 if (new.mnt == root.mnt ||
2224 old.mnt == root.mnt)
2225 goto out2;
2226 error = -EINVAL;
2227 if (root.mnt->mnt_root != root.dentry)
2228 goto out2;
2229 if (root.mnt->mnt_parent == root.mnt)
2230 goto out2;
2231 if (new.mnt->mnt_root != new.dentry)
2232 goto out2;
2233 if (new.mnt->mnt_parent == new.mnt)
2234 goto out2;
2235
2236 tmp = old.mnt;
2237 spin_lock(&vfsmount_lock);
2238 if (tmp != new.mnt) {
2239 for (;;) {
2240 if (tmp->mnt_parent == tmp)
2241 goto out3;
2242 if (tmp->mnt_parent == new.mnt)
2243 break;
2244 tmp = tmp->mnt_parent;
2245 }
2246 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2247 goto out3;
2248 } else if (!is_subdir(old.dentry, new.dentry))
2249 goto out3;
2250 detach_mnt(new.mnt, &parent_path);
2251 detach_mnt(root.mnt, &root_parent);
2252
2253 attach_mnt(root.mnt, &old);
2254
2255 attach_mnt(new.mnt, &root_parent);
2256 touch_mnt_namespace(current->nsproxy->mnt_ns);
2257 spin_unlock(&vfsmount_lock);
2258 chroot_fs_refs(&root, &new);
2259 security_sb_post_pivotroot(&root, &new);
2260 error = 0;
2261 path_put(&root_parent);
2262 path_put(&parent_path);
2263out2:
2264 mutex_unlock(&old.dentry->d_inode->i_mutex);
2265 up_write(&namespace_sem);
2266 path_put(&root);
2267 path_put(&old);
2268out1:
2269 path_put(&new);
2270out0:
2271 return error;
2272out3:
2273 spin_unlock(&vfsmount_lock);
2274 goto out2;
2275}
2276
2277static void __init init_mount_tree(void)
2278{
2279 struct vfsmount *mnt;
2280 struct mnt_namespace *ns;
2281 struct path root;
2282
2283 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2284 if (IS_ERR(mnt))
2285 panic("Can't create rootfs");
2286 ns = kmalloc(sizeof(*ns), GFP_KERNEL);
2287 if (!ns)
2288 panic("Can't allocate initial namespace");
2289 atomic_set(&ns->count, 1);
2290 INIT_LIST_HEAD(&ns->list);
2291 init_waitqueue_head(&ns->poll);
2292 ns->event = 0;
2293 list_add(&mnt->mnt_list, &ns->list);
2294 ns->root = mnt;
2295 mnt->mnt_ns = ns;
2296
2297 init_task.nsproxy->mnt_ns = ns;
2298 get_mnt_ns(ns);
2299
2300 root.mnt = ns->root;
2301 root.dentry = ns->root->mnt_root;
2302
2303 set_fs_pwd(current->fs, &root);
2304 set_fs_root(current->fs, &root);
2305}
2306
2307void __init mnt_init(void)
2308{
2309 unsigned u;
2310 int err;
2311
2312 init_rwsem(&namespace_sem);
2313
2314 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
2315 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2316
2317 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2318
2319 if (!mount_hashtable)
2320 panic("Failed to allocate mount hash table\n");
2321
2322 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE);
2323
2324 for (u = 0; u < HASH_SIZE; u++)
2325 INIT_LIST_HEAD(&mount_hashtable[u]);
2326
2327 err = sysfs_init();
2328 if (err)
2329 printk(KERN_WARNING "%s: sysfs_init error: %d\n",
2330 __func__, err);
2331 fs_kobj = kobject_create_and_add("fs", NULL);
2332 if (!fs_kobj)
2333 printk(KERN_WARNING "%s: kobj create error\n", __func__);
2334 init_rootfs();
2335 init_mount_tree();
2336}
2337
2338void __put_mnt_ns(struct mnt_namespace *ns)
2339{
2340 struct vfsmount *root = ns->root;
2341 LIST_HEAD(umount_list);
2342 ns->root = NULL;
2343 spin_unlock(&vfsmount_lock);
2344 down_write(&namespace_sem);
2345 spin_lock(&vfsmount_lock);
2346 umount_tree(root, 0, &umount_list);
2347 spin_unlock(&vfsmount_lock);
2348 up_write(&namespace_sem);
2349 release_mounts(&umount_list);
2350 kfree(ns);
2351}