1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25#include <linux/cgroup.h>
26#include <linux/errno.h>
27#include <linux/fs.h>
28#include <linux/kernel.h>
29#include <linux/list.h>
30#include <linux/mm.h>
31#include <linux/mutex.h>
32#include <linux/mount.h>
33#include <linux/pagemap.h>
34#include <linux/proc_fs.h>
35#include <linux/rcupdate.h>
36#include <linux/sched.h>
37#include <linux/backing-dev.h>
38#include <linux/seq_file.h>
39#include <linux/slab.h>
40#include <linux/magic.h>
41#include <linux/spinlock.h>
42#include <linux/string.h>
43#include <linux/sort.h>
44#include <linux/kmod.h>
45#include <linux/delayacct.h>
46#include <linux/cgroupstats.h>
47#include <linux/hash.h>
48#include <linux/namei.h>
49
50#include <asm/atomic.h>
51
52static DEFINE_MUTEX(cgroup_mutex);
53
54
55#define SUBSYS(_x) &_x ## _subsys,
56
57static struct cgroup_subsys *subsys[] = {
58#include <linux/cgroup_subsys.h>
59};
60
61
62
63
64
65
66struct cgroupfs_root {
67 struct super_block *sb;
68
69
70
71
72
73 unsigned long subsys_bits;
74
75
76 unsigned long actual_subsys_bits;
77
78
79 struct list_head subsys_list;
80
81
82 struct cgroup top_cgroup;
83
84
85 int number_of_cgroups;
86
87
88 struct list_head root_list;
89
90
91 unsigned long flags;
92
93
94 char release_agent_path[PATH_MAX];
95};
96
97
98
99
100
101
102
103static struct cgroupfs_root rootnode;
104
105
106
107static LIST_HEAD(roots);
108static int root_count;
109
110
111#define dummytop (&rootnode.top_cgroup)
112
113
114
115
116
117
118static int need_forkexit_callback __read_mostly;
119static int need_mm_owner_callback __read_mostly;
120
121
122inline int cgroup_is_removed(const struct cgroup *cgrp)
123{
124 return test_bit(CGRP_REMOVED, &cgrp->flags);
125}
126
127
128enum {
129 ROOT_NOPREFIX,
130};
131
132static int cgroup_is_releasable(const struct cgroup *cgrp)
133{
134 const int bits =
135 (1 << CGRP_RELEASABLE) |
136 (1 << CGRP_NOTIFY_ON_RELEASE);
137 return (cgrp->flags & bits) == bits;
138}
139
140static int notify_on_release(const struct cgroup *cgrp)
141{
142 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
143}
144
145
146
147
148
149#define for_each_subsys(_root, _ss) \
150list_for_each_entry(_ss, &_root->subsys_list, sibling)
151
152
153#define for_each_root(_root) \
154list_for_each_entry(_root, &roots, root_list)
155
156
157
158static LIST_HEAD(release_list);
159static DEFINE_SPINLOCK(release_list_lock);
160static void cgroup_release_agent(struct work_struct *work);
161static DECLARE_WORK(release_agent_work, cgroup_release_agent);
162static void check_for_release(struct cgroup *cgrp);
163
164
165struct cg_cgroup_link {
166
167
168
169
170 struct list_head cgrp_link_list;
171
172
173
174
175 struct list_head cg_link_list;
176 struct css_set *cg;
177};
178
179
180
181
182
183
184
185
186static struct css_set init_css_set;
187static struct cg_cgroup_link init_css_set_link;
188
189
190
191
192static DEFINE_RWLOCK(css_set_lock);
193static int css_set_count;
194
195
196
197#define CSS_SET_HASH_BITS 7
198#define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS)
199static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE];
200
201static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
202{
203 int i;
204 int index;
205 unsigned long tmp = 0UL;
206
207 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++)
208 tmp += (unsigned long)css[i];
209 tmp = (tmp >> 16) ^ tmp;
210
211 index = hash_long(tmp, CSS_SET_HASH_BITS);
212
213 return &css_set_table[index];
214}
215
216
217
218
219
220static int use_task_css_set_links __read_mostly;
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239static void unlink_css_set(struct css_set *cg)
240{
241 struct cg_cgroup_link *link;
242 struct cg_cgroup_link *saved_link;
243
244 hlist_del(&cg->hlist);
245 css_set_count--;
246
247 list_for_each_entry_safe(link, saved_link, &cg->cg_links,
248 cg_link_list) {
249 list_del(&link->cg_link_list);
250 list_del(&link->cgrp_link_list);
251 kfree(link);
252 }
253}
254
255static void __put_css_set(struct css_set *cg, int taskexit)
256{
257 int i;
258
259
260
261
262
263 if (atomic_add_unless(&cg->refcount, -1, 1))
264 return;
265 write_lock(&css_set_lock);
266 if (!atomic_dec_and_test(&cg->refcount)) {
267 write_unlock(&css_set_lock);
268 return;
269 }
270 unlink_css_set(cg);
271 write_unlock(&css_set_lock);
272
273 rcu_read_lock();
274 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
275 struct cgroup *cgrp = cg->subsys[i]->cgroup;
276 if (atomic_dec_and_test(&cgrp->count) &&
277 notify_on_release(cgrp)) {
278 if (taskexit)
279 set_bit(CGRP_RELEASABLE, &cgrp->flags);
280 check_for_release(cgrp);
281 }
282 }
283 rcu_read_unlock();
284 kfree(cg);
285}
286
287
288
289
290static inline void get_css_set(struct css_set *cg)
291{
292 atomic_inc(&cg->refcount);
293}
294
295static inline void put_css_set(struct css_set *cg)
296{
297 __put_css_set(cg, 0);
298}
299
300static inline void put_css_set_taskexit(struct css_set *cg)
301{
302 __put_css_set(cg, 1);
303}
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318static struct css_set *find_existing_css_set(
319 struct css_set *oldcg,
320 struct cgroup *cgrp,
321 struct cgroup_subsys_state *template[])
322{
323 int i;
324 struct cgroupfs_root *root = cgrp->root;
325 struct hlist_head *hhead;
326 struct hlist_node *node;
327 struct css_set *cg;
328
329
330
331 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
332 if (root->subsys_bits & (1UL << i)) {
333
334
335
336 template[i] = cgrp->subsys[i];
337 } else {
338
339
340 template[i] = oldcg->subsys[i];
341 }
342 }
343
344 hhead = css_set_hash(template);
345 hlist_for_each_entry(cg, node, hhead, hlist) {
346 if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
347
348 return cg;
349 }
350 }
351
352
353 return NULL;
354}
355
356static void free_cg_links(struct list_head *tmp)
357{
358 struct cg_cgroup_link *link;
359 struct cg_cgroup_link *saved_link;
360
361 list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) {
362 list_del(&link->cgrp_link_list);
363 kfree(link);
364 }
365}
366
367
368
369
370
371
372static int allocate_cg_links(int count, struct list_head *tmp)
373{
374 struct cg_cgroup_link *link;
375 int i;
376 INIT_LIST_HEAD(tmp);
377 for (i = 0; i < count; i++) {
378 link = kmalloc(sizeof(*link), GFP_KERNEL);
379 if (!link) {
380 free_cg_links(tmp);
381 return -ENOMEM;
382 }
383 list_add(&link->cgrp_link_list, tmp);
384 }
385 return 0;
386}
387
388
389
390
391
392
393
394
395static struct css_set *find_css_set(
396 struct css_set *oldcg, struct cgroup *cgrp)
397{
398 struct css_set *res;
399 struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT];
400 int i;
401
402 struct list_head tmp_cg_links;
403 struct cg_cgroup_link *link;
404
405 struct hlist_head *hhead;
406
407
408
409 read_lock(&css_set_lock);
410 res = find_existing_css_set(oldcg, cgrp, template);
411 if (res)
412 get_css_set(res);
413 read_unlock(&css_set_lock);
414
415 if (res)
416 return res;
417
418 res = kmalloc(sizeof(*res), GFP_KERNEL);
419 if (!res)
420 return NULL;
421
422
423 if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
424 kfree(res);
425 return NULL;
426 }
427
428 atomic_set(&res->refcount, 1);
429 INIT_LIST_HEAD(&res->cg_links);
430 INIT_LIST_HEAD(&res->tasks);
431 INIT_HLIST_NODE(&res->hlist);
432
433
434
435 memcpy(res->subsys, template, sizeof(res->subsys));
436
437 write_lock(&css_set_lock);
438
439 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
440 struct cgroup *cgrp = res->subsys[i]->cgroup;
441 struct cgroup_subsys *ss = subsys[i];
442 atomic_inc(&cgrp->count);
443
444
445
446
447
448 if (ss->root->subsys_list.next == &ss->sibling) {
449 BUG_ON(list_empty(&tmp_cg_links));
450 link = list_entry(tmp_cg_links.next,
451 struct cg_cgroup_link,
452 cgrp_link_list);
453 list_del(&link->cgrp_link_list);
454 list_add(&link->cgrp_link_list, &cgrp->css_sets);
455 link->cg = res;
456 list_add(&link->cg_link_list, &res->cg_links);
457 }
458 }
459 if (list_empty(&rootnode.subsys_list)) {
460 link = list_entry(tmp_cg_links.next,
461 struct cg_cgroup_link,
462 cgrp_link_list);
463 list_del(&link->cgrp_link_list);
464 list_add(&link->cgrp_link_list, &dummytop->css_sets);
465 link->cg = res;
466 list_add(&link->cg_link_list, &res->cg_links);
467 }
468
469 BUG_ON(!list_empty(&tmp_cg_links));
470
471 css_set_count++;
472
473
474 hhead = css_set_hash(res->subsys);
475 hlist_add_head(&res->hlist, hhead);
476
477 write_unlock(&css_set_lock);
478
479 return res;
480}
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536void cgroup_lock(void)
537{
538 mutex_lock(&cgroup_mutex);
539}
540
541
542
543
544
545
546void cgroup_unlock(void)
547{
548 mutex_unlock(&cgroup_mutex);
549}
550
551
552
553
554
555
556
557
558static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
559static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
560static int cgroup_populate_dir(struct cgroup *cgrp);
561static struct inode_operations cgroup_dir_inode_operations;
562static struct file_operations proc_cgroupstats_operations;
563
564static struct backing_dev_info cgroup_backing_dev_info = {
565 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
566};
567
568static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
569{
570 struct inode *inode = new_inode(sb);
571
572 if (inode) {
573 inode->i_mode = mode;
574 inode->i_uid = current->fsuid;
575 inode->i_gid = current->fsgid;
576 inode->i_blocks = 0;
577 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
578 inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
579 }
580 return inode;
581}
582
583
584
585
586
587static void cgroup_call_pre_destroy(struct cgroup *cgrp)
588{
589 struct cgroup_subsys *ss;
590 for_each_subsys(cgrp->root, ss)
591 if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
592 ss->pre_destroy(ss, cgrp);
593 return;
594}
595
596static void cgroup_diput(struct dentry *dentry, struct inode *inode)
597{
598
599 if (S_ISDIR(inode->i_mode)) {
600 struct cgroup *cgrp = dentry->d_fsdata;
601 struct cgroup_subsys *ss;
602 BUG_ON(!(cgroup_is_removed(cgrp)));
603
604
605
606
607
608
609 synchronize_rcu();
610
611 mutex_lock(&cgroup_mutex);
612
613
614
615 for_each_subsys(cgrp->root, ss) {
616 if (cgrp->subsys[ss->subsys_id])
617 ss->destroy(ss, cgrp);
618 }
619
620 cgrp->root->number_of_cgroups--;
621 mutex_unlock(&cgroup_mutex);
622
623
624
625 deactivate_super(cgrp->root->sb);
626
627 kfree(cgrp);
628 }
629 iput(inode);
630}
631
632static void remove_dir(struct dentry *d)
633{
634 struct dentry *parent = dget(d->d_parent);
635
636 d_delete(d);
637 simple_rmdir(parent->d_inode, d);
638 dput(parent);
639}
640
641static void cgroup_clear_directory(struct dentry *dentry)
642{
643 struct list_head *node;
644
645 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
646 spin_lock(&dcache_lock);
647 node = dentry->d_subdirs.next;
648 while (node != &dentry->d_subdirs) {
649 struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
650 list_del_init(node);
651 if (d->d_inode) {
652
653
654 BUG_ON(d->d_inode->i_mode & S_IFDIR);
655 d = dget_locked(d);
656 spin_unlock(&dcache_lock);
657 d_delete(d);
658 simple_unlink(dentry->d_inode, d);
659 dput(d);
660 spin_lock(&dcache_lock);
661 }
662 node = dentry->d_subdirs.next;
663 }
664 spin_unlock(&dcache_lock);
665}
666
667
668
669
670static void cgroup_d_remove_dir(struct dentry *dentry)
671{
672 cgroup_clear_directory(dentry);
673
674 spin_lock(&dcache_lock);
675 list_del_init(&dentry->d_u.d_child);
676 spin_unlock(&dcache_lock);
677 remove_dir(dentry);
678}
679
680static int rebind_subsystems(struct cgroupfs_root *root,
681 unsigned long final_bits)
682{
683 unsigned long added_bits, removed_bits;
684 struct cgroup *cgrp = &root->top_cgroup;
685 int i;
686
687 removed_bits = root->actual_subsys_bits & ~final_bits;
688 added_bits = final_bits & ~root->actual_subsys_bits;
689
690 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
691 unsigned long bit = 1UL << i;
692 struct cgroup_subsys *ss = subsys[i];
693 if (!(bit & added_bits))
694 continue;
695 if (ss->root != &rootnode) {
696
697 return -EBUSY;
698 }
699 }
700
701
702
703
704
705 if (root->number_of_cgroups > 1)
706 return -EBUSY;
707
708
709 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
710 struct cgroup_subsys *ss = subsys[i];
711 unsigned long bit = 1UL << i;
712 if (bit & added_bits) {
713
714 BUG_ON(cgrp->subsys[i]);
715 BUG_ON(!dummytop->subsys[i]);
716 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
717 cgrp->subsys[i] = dummytop->subsys[i];
718 cgrp->subsys[i]->cgroup = cgrp;
719 list_add(&ss->sibling, &root->subsys_list);
720 rcu_assign_pointer(ss->root, root);
721 if (ss->bind)
722 ss->bind(ss, cgrp);
723
724 } else if (bit & removed_bits) {
725
726 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
727 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
728 if (ss->bind)
729 ss->bind(ss, dummytop);
730 dummytop->subsys[i]->cgroup = dummytop;
731 cgrp->subsys[i] = NULL;
732 rcu_assign_pointer(subsys[i]->root, &rootnode);
733 list_del(&ss->sibling);
734 } else if (bit & final_bits) {
735
736 BUG_ON(!cgrp->subsys[i]);
737 } else {
738
739 BUG_ON(cgrp->subsys[i]);
740 }
741 }
742 root->subsys_bits = root->actual_subsys_bits = final_bits;
743 synchronize_rcu();
744
745 return 0;
746}
747
748static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
749{
750 struct cgroupfs_root *root = vfs->mnt_sb->s_fs_info;
751 struct cgroup_subsys *ss;
752
753 mutex_lock(&cgroup_mutex);
754 for_each_subsys(root, ss)
755 seq_printf(seq, ",%s", ss->name);
756 if (test_bit(ROOT_NOPREFIX, &root->flags))
757 seq_puts(seq, ",noprefix");
758 if (strlen(root->release_agent_path))
759 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
760 mutex_unlock(&cgroup_mutex);
761 return 0;
762}
763
764struct cgroup_sb_opts {
765 unsigned long subsys_bits;
766 unsigned long flags;
767 char *release_agent;
768};
769
770
771
772static int parse_cgroupfs_options(char *data,
773 struct cgroup_sb_opts *opts)
774{
775 char *token, *o = data ?: "all";
776
777 opts->subsys_bits = 0;
778 opts->flags = 0;
779 opts->release_agent = NULL;
780
781 while ((token = strsep(&o, ",")) != NULL) {
782 if (!*token)
783 return -EINVAL;
784 if (!strcmp(token, "all")) {
785
786 int i;
787 opts->subsys_bits = 0;
788 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
789 struct cgroup_subsys *ss = subsys[i];
790 if (!ss->disabled)
791 opts->subsys_bits |= 1ul << i;
792 }
793 } else if (!strcmp(token, "noprefix")) {
794 set_bit(ROOT_NOPREFIX, &opts->flags);
795 } else if (!strncmp(token, "release_agent=", 14)) {
796
797 if (opts->release_agent)
798 return -EINVAL;
799 opts->release_agent = kzalloc(PATH_MAX, GFP_KERNEL);
800 if (!opts->release_agent)
801 return -ENOMEM;
802 strncpy(opts->release_agent, token + 14, PATH_MAX - 1);
803 opts->release_agent[PATH_MAX - 1] = 0;
804 } else {
805 struct cgroup_subsys *ss;
806 int i;
807 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
808 ss = subsys[i];
809 if (!strcmp(token, ss->name)) {
810 if (!ss->disabled)
811 set_bit(i, &opts->subsys_bits);
812 break;
813 }
814 }
815 if (i == CGROUP_SUBSYS_COUNT)
816 return -ENOENT;
817 }
818 }
819
820
821 if (!opts->subsys_bits)
822 return -EINVAL;
823
824 return 0;
825}
826
827static int cgroup_remount(struct super_block *sb, int *flags, char *data)
828{
829 int ret = 0;
830 struct cgroupfs_root *root = sb->s_fs_info;
831 struct cgroup *cgrp = &root->top_cgroup;
832 struct cgroup_sb_opts opts;
833
834 mutex_lock(&cgrp->dentry->d_inode->i_mutex);
835 mutex_lock(&cgroup_mutex);
836
837
838 ret = parse_cgroupfs_options(data, &opts);
839 if (ret)
840 goto out_unlock;
841
842
843 if (opts.flags != root->flags) {
844 ret = -EINVAL;
845 goto out_unlock;
846 }
847
848 ret = rebind_subsystems(root, opts.subsys_bits);
849
850
851 if (!ret)
852 cgroup_populate_dir(cgrp);
853
854 if (opts.release_agent)
855 strcpy(root->release_agent_path, opts.release_agent);
856 out_unlock:
857 if (opts.release_agent)
858 kfree(opts.release_agent);
859 mutex_unlock(&cgroup_mutex);
860 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
861 return ret;
862}
863
864static struct super_operations cgroup_ops = {
865 .statfs = simple_statfs,
866 .drop_inode = generic_delete_inode,
867 .show_options = cgroup_show_options,
868 .remount_fs = cgroup_remount,
869};
870
871static void init_cgroup_housekeeping(struct cgroup *cgrp)
872{
873 INIT_LIST_HEAD(&cgrp->sibling);
874 INIT_LIST_HEAD(&cgrp->children);
875 INIT_LIST_HEAD(&cgrp->css_sets);
876 INIT_LIST_HEAD(&cgrp->release_list);
877 init_rwsem(&cgrp->pids_mutex);
878}
879static void init_cgroup_root(struct cgroupfs_root *root)
880{
881 struct cgroup *cgrp = &root->top_cgroup;
882 INIT_LIST_HEAD(&root->subsys_list);
883 INIT_LIST_HEAD(&root->root_list);
884 root->number_of_cgroups = 1;
885 cgrp->root = root;
886 cgrp->top_cgroup = cgrp;
887 init_cgroup_housekeeping(cgrp);
888}
889
890static int cgroup_test_super(struct super_block *sb, void *data)
891{
892 struct cgroupfs_root *new = data;
893 struct cgroupfs_root *root = sb->s_fs_info;
894
895
896 if (new->subsys_bits != root->subsys_bits)
897 return 0;
898
899
900 if (new->flags != root->flags)
901 return 0;
902
903 return 1;
904}
905
906static int cgroup_set_super(struct super_block *sb, void *data)
907{
908 int ret;
909 struct cgroupfs_root *root = data;
910
911 ret = set_anon_super(sb, NULL);
912 if (ret)
913 return ret;
914
915 sb->s_fs_info = root;
916 root->sb = sb;
917
918 sb->s_blocksize = PAGE_CACHE_SIZE;
919 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
920 sb->s_magic = CGROUP_SUPER_MAGIC;
921 sb->s_op = &cgroup_ops;
922
923 return 0;
924}
925
926static int cgroup_get_rootdir(struct super_block *sb)
927{
928 struct inode *inode =
929 cgroup_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
930 struct dentry *dentry;
931
932 if (!inode)
933 return -ENOMEM;
934
935 inode->i_fop = &simple_dir_operations;
936 inode->i_op = &cgroup_dir_inode_operations;
937
938 inc_nlink(inode);
939 dentry = d_alloc_root(inode);
940 if (!dentry) {
941 iput(inode);
942 return -ENOMEM;
943 }
944 sb->s_root = dentry;
945 return 0;
946}
947
948static int cgroup_get_sb(struct file_system_type *fs_type,
949 int flags, const char *unused_dev_name,
950 void *data, struct vfsmount *mnt)
951{
952 struct cgroup_sb_opts opts;
953 int ret = 0;
954 struct super_block *sb;
955 struct cgroupfs_root *root;
956 struct list_head tmp_cg_links;
957
958
959 ret = parse_cgroupfs_options(data, &opts);
960 if (ret) {
961 if (opts.release_agent)
962 kfree(opts.release_agent);
963 return ret;
964 }
965
966 root = kzalloc(sizeof(*root), GFP_KERNEL);
967 if (!root) {
968 if (opts.release_agent)
969 kfree(opts.release_agent);
970 return -ENOMEM;
971 }
972
973 init_cgroup_root(root);
974 root->subsys_bits = opts.subsys_bits;
975 root->flags = opts.flags;
976 if (opts.release_agent) {
977 strcpy(root->release_agent_path, opts.release_agent);
978 kfree(opts.release_agent);
979 }
980
981 sb = sget(fs_type, cgroup_test_super, cgroup_set_super, root);
982
983 if (IS_ERR(sb)) {
984 kfree(root);
985 return PTR_ERR(sb);
986 }
987
988 if (sb->s_fs_info != root) {
989
990 BUG_ON(sb->s_root == NULL);
991 kfree(root);
992 root = NULL;
993 } else {
994
995 struct cgroup *cgrp = &root->top_cgroup;
996 struct inode *inode;
997 int i;
998
999 BUG_ON(sb->s_root != NULL);
1000
1001 ret = cgroup_get_rootdir(sb);
1002 if (ret)
1003 goto drop_new_super;
1004 inode = sb->s_root->d_inode;
1005
1006 mutex_lock(&inode->i_mutex);
1007 mutex_lock(&cgroup_mutex);
1008
1009
1010
1011
1012
1013
1014
1015
1016 ret = allocate_cg_links(css_set_count, &tmp_cg_links);
1017 if (ret) {
1018 mutex_unlock(&cgroup_mutex);
1019 mutex_unlock(&inode->i_mutex);
1020 goto drop_new_super;
1021 }
1022
1023 ret = rebind_subsystems(root, root->subsys_bits);
1024 if (ret == -EBUSY) {
1025 mutex_unlock(&cgroup_mutex);
1026 mutex_unlock(&inode->i_mutex);
1027 goto free_cg_links;
1028 }
1029
1030
1031 BUG_ON(ret);
1032
1033 list_add(&root->root_list, &roots);
1034 root_count++;
1035
1036 sb->s_root->d_fsdata = &root->top_cgroup;
1037 root->top_cgroup.dentry = sb->s_root;
1038
1039
1040
1041 write_lock(&css_set_lock);
1042 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
1043 struct hlist_head *hhead = &css_set_table[i];
1044 struct hlist_node *node;
1045 struct css_set *cg;
1046
1047 hlist_for_each_entry(cg, node, hhead, hlist) {
1048 struct cg_cgroup_link *link;
1049
1050 BUG_ON(list_empty(&tmp_cg_links));
1051 link = list_entry(tmp_cg_links.next,
1052 struct cg_cgroup_link,
1053 cgrp_link_list);
1054 list_del(&link->cgrp_link_list);
1055 link->cg = cg;
1056 list_add(&link->cgrp_link_list,
1057 &root->top_cgroup.css_sets);
1058 list_add(&link->cg_link_list, &cg->cg_links);
1059 }
1060 }
1061 write_unlock(&css_set_lock);
1062
1063 free_cg_links(&tmp_cg_links);
1064
1065 BUG_ON(!list_empty(&cgrp->sibling));
1066 BUG_ON(!list_empty(&cgrp->children));
1067 BUG_ON(root->number_of_cgroups != 1);
1068
1069 cgroup_populate_dir(cgrp);
1070 mutex_unlock(&inode->i_mutex);
1071 mutex_unlock(&cgroup_mutex);
1072 }
1073
1074 return simple_set_mnt(mnt, sb);
1075
1076 free_cg_links:
1077 free_cg_links(&tmp_cg_links);
1078 drop_new_super:
1079 up_write(&sb->s_umount);
1080 deactivate_super(sb);
1081 return ret;
1082}
1083
1084static void cgroup_kill_sb(struct super_block *sb) {
1085 struct cgroupfs_root *root = sb->s_fs_info;
1086 struct cgroup *cgrp = &root->top_cgroup;
1087 int ret;
1088 struct cg_cgroup_link *link;
1089 struct cg_cgroup_link *saved_link;
1090
1091 BUG_ON(!root);
1092
1093 BUG_ON(root->number_of_cgroups != 1);
1094 BUG_ON(!list_empty(&cgrp->children));
1095 BUG_ON(!list_empty(&cgrp->sibling));
1096
1097 mutex_lock(&cgroup_mutex);
1098
1099
1100 ret = rebind_subsystems(root, 0);
1101
1102 BUG_ON(ret);
1103
1104
1105
1106
1107
1108 write_lock(&css_set_lock);
1109
1110 list_for_each_entry_safe(link, saved_link, &cgrp->css_sets,
1111 cgrp_link_list) {
1112 list_del(&link->cg_link_list);
1113 list_del(&link->cgrp_link_list);
1114 kfree(link);
1115 }
1116 write_unlock(&css_set_lock);
1117
1118 if (!list_empty(&root->root_list)) {
1119 list_del(&root->root_list);
1120 root_count--;
1121 }
1122 mutex_unlock(&cgroup_mutex);
1123
1124 kfree(root);
1125 kill_litter_super(sb);
1126}
1127
1128static struct file_system_type cgroup_fs_type = {
1129 .name = "cgroup",
1130 .get_sb = cgroup_get_sb,
1131 .kill_sb = cgroup_kill_sb,
1132};
1133
1134static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1135{
1136 return dentry->d_fsdata;
1137}
1138
1139static inline struct cftype *__d_cft(struct dentry *dentry)
1140{
1141 return dentry->d_fsdata;
1142}
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1154{
1155 char *start;
1156
1157 if (cgrp == dummytop) {
1158
1159
1160
1161
1162 strcpy(buf, "/");
1163 return 0;
1164 }
1165
1166 start = buf + buflen;
1167
1168 *--start = '\0';
1169 for (;;) {
1170 int len = cgrp->dentry->d_name.len;
1171 if ((start -= len) < buf)
1172 return -ENAMETOOLONG;
1173 memcpy(start, cgrp->dentry->d_name.name, len);
1174 cgrp = cgrp->parent;
1175 if (!cgrp)
1176 break;
1177 if (!cgrp->parent)
1178 continue;
1179 if (--start < buf)
1180 return -ENAMETOOLONG;
1181 *start = '/';
1182 }
1183 memmove(buf, start, buf + buflen - start);
1184 return 0;
1185}
1186
1187
1188
1189
1190
1191
1192static void get_first_subsys(const struct cgroup *cgrp,
1193 struct cgroup_subsys_state **css, int *subsys_id)
1194{
1195 const struct cgroupfs_root *root = cgrp->root;
1196 const struct cgroup_subsys *test_ss;
1197 BUG_ON(list_empty(&root->subsys_list));
1198 test_ss = list_entry(root->subsys_list.next,
1199 struct cgroup_subsys, sibling);
1200 if (css) {
1201 *css = cgrp->subsys[test_ss->subsys_id];
1202 BUG_ON(!*css);
1203 }
1204 if (subsys_id)
1205 *subsys_id = test_ss->subsys_id;
1206}
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1217{
1218 int retval = 0;
1219 struct cgroup_subsys *ss;
1220 struct cgroup *oldcgrp;
1221 struct css_set *cg = tsk->cgroups;
1222 struct css_set *newcg;
1223 struct cgroupfs_root *root = cgrp->root;
1224 int subsys_id;
1225
1226 get_first_subsys(cgrp, NULL, &subsys_id);
1227
1228
1229 oldcgrp = task_cgroup(tsk, subsys_id);
1230 if (cgrp == oldcgrp)
1231 return 0;
1232
1233 for_each_subsys(root, ss) {
1234 if (ss->can_attach) {
1235 retval = ss->can_attach(ss, cgrp, tsk);
1236 if (retval)
1237 return retval;
1238 }
1239 }
1240
1241
1242
1243
1244
1245 newcg = find_css_set(cg, cgrp);
1246 if (!newcg)
1247 return -ENOMEM;
1248
1249 task_lock(tsk);
1250 if (tsk->flags & PF_EXITING) {
1251 task_unlock(tsk);
1252 put_css_set(newcg);
1253 return -ESRCH;
1254 }
1255 rcu_assign_pointer(tsk->cgroups, newcg);
1256 task_unlock(tsk);
1257
1258
1259 write_lock(&css_set_lock);
1260 if (!list_empty(&tsk->cg_list)) {
1261 list_del(&tsk->cg_list);
1262 list_add(&tsk->cg_list, &newcg->tasks);
1263 }
1264 write_unlock(&css_set_lock);
1265
1266 for_each_subsys(root, ss) {
1267 if (ss->attach)
1268 ss->attach(ss, cgrp, oldcgrp, tsk);
1269 }
1270 set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
1271 synchronize_rcu();
1272 put_css_set(cg);
1273 return 0;
1274}
1275
1276
1277
1278
1279
1280static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
1281{
1282 struct task_struct *tsk;
1283 int ret;
1284
1285 if (pid) {
1286 rcu_read_lock();
1287 tsk = find_task_by_vpid(pid);
1288 if (!tsk || tsk->flags & PF_EXITING) {
1289 rcu_read_unlock();
1290 return -ESRCH;
1291 }
1292 get_task_struct(tsk);
1293 rcu_read_unlock();
1294
1295 if ((current->euid) && (current->euid != tsk->uid)
1296 && (current->euid != tsk->suid)) {
1297 put_task_struct(tsk);
1298 return -EACCES;
1299 }
1300 } else {
1301 tsk = current;
1302 get_task_struct(tsk);
1303 }
1304
1305 ret = cgroup_attach_task(cgrp, tsk);
1306 put_task_struct(tsk);
1307 return ret;
1308}
1309
1310static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
1311{
1312 int ret;
1313 if (!cgroup_lock_live_group(cgrp))
1314 return -ENODEV;
1315 ret = attach_task_by_pid(cgrp, pid);
1316 cgroup_unlock();
1317 return ret;
1318}
1319
1320
1321enum cgroup_filetype {
1322 FILE_ROOT,
1323 FILE_DIR,
1324 FILE_TASKLIST,
1325 FILE_NOTIFY_ON_RELEASE,
1326 FILE_RELEASE_AGENT,
1327};
1328
1329
1330
1331
1332
1333
1334
1335
1336bool cgroup_lock_live_group(struct cgroup *cgrp)
1337{
1338 mutex_lock(&cgroup_mutex);
1339 if (cgroup_is_removed(cgrp)) {
1340 mutex_unlock(&cgroup_mutex);
1341 return false;
1342 }
1343 return true;
1344}
1345
1346static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1347 const char *buffer)
1348{
1349 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1350 if (!cgroup_lock_live_group(cgrp))
1351 return -ENODEV;
1352 strcpy(cgrp->root->release_agent_path, buffer);
1353 cgroup_unlock();
1354 return 0;
1355}
1356
1357static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
1358 struct seq_file *seq)
1359{
1360 if (!cgroup_lock_live_group(cgrp))
1361 return -ENODEV;
1362 seq_puts(seq, cgrp->root->release_agent_path);
1363 seq_putc(seq, '\n');
1364 cgroup_unlock();
1365 return 0;
1366}
1367
1368
1369#define CGROUP_LOCAL_BUFFER_SIZE 64
1370
1371static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
1372 struct file *file,
1373 const char __user *userbuf,
1374 size_t nbytes, loff_t *unused_ppos)
1375{
1376 char buffer[CGROUP_LOCAL_BUFFER_SIZE];
1377 int retval = 0;
1378 char *end;
1379
1380 if (!nbytes)
1381 return -EINVAL;
1382 if (nbytes >= sizeof(buffer))
1383 return -E2BIG;
1384 if (copy_from_user(buffer, userbuf, nbytes))
1385 return -EFAULT;
1386
1387 buffer[nbytes] = 0;
1388 strstrip(buffer);
1389 if (cft->write_u64) {
1390 u64 val = simple_strtoull(buffer, &end, 0);
1391 if (*end)
1392 return -EINVAL;
1393 retval = cft->write_u64(cgrp, cft, val);
1394 } else {
1395 s64 val = simple_strtoll(buffer, &end, 0);
1396 if (*end)
1397 return -EINVAL;
1398 retval = cft->write_s64(cgrp, cft, val);
1399 }
1400 if (!retval)
1401 retval = nbytes;
1402 return retval;
1403}
1404
1405static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
1406 struct file *file,
1407 const char __user *userbuf,
1408 size_t nbytes, loff_t *unused_ppos)
1409{
1410 char local_buffer[CGROUP_LOCAL_BUFFER_SIZE];
1411 int retval = 0;
1412 size_t max_bytes = cft->max_write_len;
1413 char *buffer = local_buffer;
1414
1415 if (!max_bytes)
1416 max_bytes = sizeof(local_buffer) - 1;
1417 if (nbytes >= max_bytes)
1418 return -E2BIG;
1419
1420 if (nbytes >= sizeof(local_buffer)) {
1421 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1422 if (buffer == NULL)
1423 return -ENOMEM;
1424 }
1425 if (nbytes && copy_from_user(buffer, userbuf, nbytes)) {
1426 retval = -EFAULT;
1427 goto out;
1428 }
1429
1430 buffer[nbytes] = 0;
1431 strstrip(buffer);
1432 retval = cft->write_string(cgrp, cft, buffer);
1433 if (!retval)
1434 retval = nbytes;
1435out:
1436 if (buffer != local_buffer)
1437 kfree(buffer);
1438 return retval;
1439}
1440
1441static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
1442 size_t nbytes, loff_t *ppos)
1443{
1444 struct cftype *cft = __d_cft(file->f_dentry);
1445 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1446
1447 if (!cft || cgroup_is_removed(cgrp))
1448 return -ENODEV;
1449 if (cft->write)
1450 return cft->write(cgrp, cft, file, buf, nbytes, ppos);
1451 if (cft->write_u64 || cft->write_s64)
1452 return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
1453 if (cft->write_string)
1454 return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
1455 if (cft->trigger) {
1456 int ret = cft->trigger(cgrp, (unsigned int)cft->private);
1457 return ret ? ret : nbytes;
1458 }
1459 return -EINVAL;
1460}
1461
1462static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft,
1463 struct file *file,
1464 char __user *buf, size_t nbytes,
1465 loff_t *ppos)
1466{
1467 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1468 u64 val = cft->read_u64(cgrp, cft);
1469 int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
1470
1471 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1472}
1473
1474static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft,
1475 struct file *file,
1476 char __user *buf, size_t nbytes,
1477 loff_t *ppos)
1478{
1479 char tmp[CGROUP_LOCAL_BUFFER_SIZE];
1480 s64 val = cft->read_s64(cgrp, cft);
1481 int len = sprintf(tmp, "%lld\n", (long long) val);
1482
1483 return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
1484}
1485
1486static ssize_t cgroup_file_read(struct file *file, char __user *buf,
1487 size_t nbytes, loff_t *ppos)
1488{
1489 struct cftype *cft = __d_cft(file->f_dentry);
1490 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
1491
1492 if (!cft || cgroup_is_removed(cgrp))
1493 return -ENODEV;
1494
1495 if (cft->read)
1496 return cft->read(cgrp, cft, file, buf, nbytes, ppos);
1497 if (cft->read_u64)
1498 return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos);
1499 if (cft->read_s64)
1500 return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos);
1501 return -EINVAL;
1502}
1503
1504
1505
1506
1507
1508
1509struct cgroup_seqfile_state {
1510 struct cftype *cft;
1511 struct cgroup *cgroup;
1512};
1513
1514static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value)
1515{
1516 struct seq_file *sf = cb->state;
1517 return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value);
1518}
1519
1520static int cgroup_seqfile_show(struct seq_file *m, void *arg)
1521{
1522 struct cgroup_seqfile_state *state = m->private;
1523 struct cftype *cft = state->cft;
1524 if (cft->read_map) {
1525 struct cgroup_map_cb cb = {
1526 .fill = cgroup_map_add,
1527 .state = m,
1528 };
1529 return cft->read_map(state->cgroup, cft, &cb);
1530 }
1531 return cft->read_seq_string(state->cgroup, cft, m);
1532}
1533
1534static int cgroup_seqfile_release(struct inode *inode, struct file *file)
1535{
1536 struct seq_file *seq = file->private_data;
1537 kfree(seq->private);
1538 return single_release(inode, file);
1539}
1540
1541static struct file_operations cgroup_seqfile_operations = {
1542 .read = seq_read,
1543 .write = cgroup_file_write,
1544 .llseek = seq_lseek,
1545 .release = cgroup_seqfile_release,
1546};
1547
1548static int cgroup_file_open(struct inode *inode, struct file *file)
1549{
1550 int err;
1551 struct cftype *cft;
1552
1553 err = generic_file_open(inode, file);
1554 if (err)
1555 return err;
1556
1557 cft = __d_cft(file->f_dentry);
1558 if (!cft)
1559 return -ENODEV;
1560 if (cft->read_map || cft->read_seq_string) {
1561 struct cgroup_seqfile_state *state =
1562 kzalloc(sizeof(*state), GFP_USER);
1563 if (!state)
1564 return -ENOMEM;
1565 state->cft = cft;
1566 state->cgroup = __d_cgrp(file->f_dentry->d_parent);
1567 file->f_op = &cgroup_seqfile_operations;
1568 err = single_open(file, cgroup_seqfile_show, state);
1569 if (err < 0)
1570 kfree(state);
1571 } else if (cft->open)
1572 err = cft->open(inode, file);
1573 else
1574 err = 0;
1575
1576 return err;
1577}
1578
1579static int cgroup_file_release(struct inode *inode, struct file *file)
1580{
1581 struct cftype *cft = __d_cft(file->f_dentry);
1582 if (cft->release)
1583 return cft->release(inode, file);
1584 return 0;
1585}
1586
1587
1588
1589
1590static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
1591 struct inode *new_dir, struct dentry *new_dentry)
1592{
1593 if (!S_ISDIR(old_dentry->d_inode->i_mode))
1594 return -ENOTDIR;
1595 if (new_dentry->d_inode)
1596 return -EEXIST;
1597 if (old_dir != new_dir)
1598 return -EIO;
1599 return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
1600}
1601
1602static struct file_operations cgroup_file_operations = {
1603 .read = cgroup_file_read,
1604 .write = cgroup_file_write,
1605 .llseek = generic_file_llseek,
1606 .open = cgroup_file_open,
1607 .release = cgroup_file_release,
1608};
1609
1610static struct inode_operations cgroup_dir_inode_operations = {
1611 .lookup = simple_lookup,
1612 .mkdir = cgroup_mkdir,
1613 .rmdir = cgroup_rmdir,
1614 .rename = cgroup_rename,
1615};
1616
1617static int cgroup_create_file(struct dentry *dentry, int mode,
1618 struct super_block *sb)
1619{
1620 static struct dentry_operations cgroup_dops = {
1621 .d_iput = cgroup_diput,
1622 };
1623
1624 struct inode *inode;
1625
1626 if (!dentry)
1627 return -ENOENT;
1628 if (dentry->d_inode)
1629 return -EEXIST;
1630
1631 inode = cgroup_new_inode(mode, sb);
1632 if (!inode)
1633 return -ENOMEM;
1634
1635 if (S_ISDIR(mode)) {
1636 inode->i_op = &cgroup_dir_inode_operations;
1637 inode->i_fop = &simple_dir_operations;
1638
1639
1640 inc_nlink(inode);
1641
1642
1643
1644 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1645 } else if (S_ISREG(mode)) {
1646 inode->i_size = 0;
1647 inode->i_fop = &cgroup_file_operations;
1648 }
1649 dentry->d_op = &cgroup_dops;
1650 d_instantiate(dentry, inode);
1651 dget(dentry);
1652 return 0;
1653}
1654
1655
1656
1657
1658
1659
1660
1661
1662static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
1663 int mode)
1664{
1665 struct dentry *parent;
1666 int error = 0;
1667
1668 parent = cgrp->parent->dentry;
1669 error = cgroup_create_file(dentry, S_IFDIR | mode, cgrp->root->sb);
1670 if (!error) {
1671 dentry->d_fsdata = cgrp;
1672 inc_nlink(parent->d_inode);
1673 cgrp->dentry = dentry;
1674 dget(dentry);
1675 }
1676 dput(dentry);
1677
1678 return error;
1679}
1680
1681int cgroup_add_file(struct cgroup *cgrp,
1682 struct cgroup_subsys *subsys,
1683 const struct cftype *cft)
1684{
1685 struct dentry *dir = cgrp->dentry;
1686 struct dentry *dentry;
1687 int error;
1688
1689 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
1690 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
1691 strcpy(name, subsys->name);
1692 strcat(name, ".");
1693 }
1694 strcat(name, cft->name);
1695 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
1696 dentry = lookup_one_len(name, dir, strlen(name));
1697 if (!IS_ERR(dentry)) {
1698 error = cgroup_create_file(dentry, 0644 | S_IFREG,
1699 cgrp->root->sb);
1700 if (!error)
1701 dentry->d_fsdata = (void *)cft;
1702 dput(dentry);
1703 } else
1704 error = PTR_ERR(dentry);
1705 return error;
1706}
1707
1708int cgroup_add_files(struct cgroup *cgrp,
1709 struct cgroup_subsys *subsys,
1710 const struct cftype cft[],
1711 int count)
1712{
1713 int i, err;
1714 for (i = 0; i < count; i++) {
1715 err = cgroup_add_file(cgrp, subsys, &cft[i]);
1716 if (err)
1717 return err;
1718 }
1719 return 0;
1720}
1721
1722
1723
1724
1725
1726
1727
1728int cgroup_task_count(const struct cgroup *cgrp)
1729{
1730 int count = 0;
1731 struct cg_cgroup_link *link;
1732
1733 read_lock(&css_set_lock);
1734 list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
1735 count += atomic_read(&link->cg->refcount);
1736 }
1737 read_unlock(&css_set_lock);
1738 return count;
1739}
1740
1741
1742
1743
1744
1745static void cgroup_advance_iter(struct cgroup *cgrp,
1746 struct cgroup_iter *it)
1747{
1748 struct list_head *l = it->cg_link;
1749 struct cg_cgroup_link *link;
1750 struct css_set *cg;
1751
1752
1753 do {
1754 l = l->next;
1755 if (l == &cgrp->css_sets) {
1756 it->cg_link = NULL;
1757 return;
1758 }
1759 link = list_entry(l, struct cg_cgroup_link, cgrp_link_list);
1760 cg = link->cg;
1761 } while (list_empty(&cg->tasks));
1762 it->cg_link = l;
1763 it->task = cg->tasks.next;
1764}
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775static void cgroup_enable_task_cg_lists(void)
1776{
1777 struct task_struct *p, *g;
1778 write_lock(&css_set_lock);
1779 use_task_css_set_links = 1;
1780 do_each_thread(g, p) {
1781 task_lock(p);
1782
1783
1784
1785
1786
1787 if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))
1788 list_add(&p->cg_list, &p->cgroups->tasks);
1789 task_unlock(p);
1790 } while_each_thread(g, p);
1791 write_unlock(&css_set_lock);
1792}
1793
1794void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it)
1795{
1796
1797
1798
1799
1800
1801 if (!use_task_css_set_links)
1802 cgroup_enable_task_cg_lists();
1803
1804 read_lock(&css_set_lock);
1805 it->cg_link = &cgrp->css_sets;
1806 cgroup_advance_iter(cgrp, it);
1807}
1808
1809struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
1810 struct cgroup_iter *it)
1811{
1812 struct task_struct *res;
1813 struct list_head *l = it->task;
1814
1815
1816 if (!it->cg_link)
1817 return NULL;
1818 res = list_entry(l, struct task_struct, cg_list);
1819
1820 l = l->next;
1821 if (l == &res->cgroups->tasks) {
1822
1823
1824 cgroup_advance_iter(cgrp, it);
1825 } else {
1826 it->task = l;
1827 }
1828 return res;
1829}
1830
1831void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it)
1832{
1833 read_unlock(&css_set_lock);
1834}
1835
1836static inline int started_after_time(struct task_struct *t1,
1837 struct timespec *time,
1838 struct task_struct *t2)
1839{
1840 int start_diff = timespec_compare(&t1->start_time, time);
1841 if (start_diff > 0) {
1842 return 1;
1843 } else if (start_diff < 0) {
1844 return 0;
1845 } else {
1846
1847
1848
1849
1850
1851
1852
1853
1854 return t1 > t2;
1855 }
1856}
1857
1858
1859
1860
1861
1862
1863static inline int started_after(void *p1, void *p2)
1864{
1865 struct task_struct *t1 = p1;
1866 struct task_struct *t2 = p2;
1867 return started_after_time(t1, &t2->start_time, t2);
1868}
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897int cgroup_scan_tasks(struct cgroup_scanner *scan)
1898{
1899 int retval, i;
1900 struct cgroup_iter it;
1901 struct task_struct *p, *dropped;
1902
1903 struct task_struct *latest_task = NULL;
1904 struct ptr_heap tmp_heap;
1905 struct ptr_heap *heap;
1906 struct timespec latest_time = { 0, 0 };
1907
1908 if (scan->heap) {
1909
1910 heap = scan->heap;
1911 heap->gt = &started_after;
1912 } else {
1913
1914 heap = &tmp_heap;
1915 retval = heap_init(heap, PAGE_SIZE, GFP_KERNEL, &started_after);
1916 if (retval)
1917
1918 return retval;
1919 }
1920
1921 again:
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934 heap->size = 0;
1935 cgroup_iter_start(scan->cg, &it);
1936 while ((p = cgroup_iter_next(scan->cg, &it))) {
1937
1938
1939
1940
1941 if (scan->test_task && !scan->test_task(p, scan))
1942 continue;
1943
1944
1945
1946
1947 if (!started_after_time(p, &latest_time, latest_task))
1948 continue;
1949 dropped = heap_insert(heap, p);
1950 if (dropped == NULL) {
1951
1952
1953
1954
1955 get_task_struct(p);
1956 } else if (dropped != p) {
1957
1958
1959
1960
1961 get_task_struct(p);
1962 put_task_struct(dropped);
1963 }
1964
1965
1966
1967
1968 }
1969 cgroup_iter_end(scan->cg, &it);
1970
1971 if (heap->size) {
1972 for (i = 0; i < heap->size; i++) {
1973 struct task_struct *q = heap->ptrs[i];
1974 if (i == 0) {
1975 latest_time = q->start_time;
1976 latest_task = q;
1977 }
1978
1979 scan->process_task(q, scan);
1980 put_task_struct(q);
1981 }
1982
1983
1984
1985
1986
1987
1988
1989 goto again;
1990 }
1991 if (heap == &tmp_heap)
1992 heap_free(&tmp_heap);
1993 return 0;
1994}
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
2014{
2015 int n = 0;
2016 struct cgroup_iter it;
2017 struct task_struct *tsk;
2018 cgroup_iter_start(cgrp, &it);
2019 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2020 if (unlikely(n == npids))
2021 break;
2022 pidarray[n++] = task_pid_vnr(tsk);
2023 }
2024 cgroup_iter_end(cgrp, &it);
2025 return n;
2026}
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
2038{
2039 int ret = -EINVAL;
2040 struct cgroup *cgrp;
2041 struct cgroup_iter it;
2042 struct task_struct *tsk;
2043
2044
2045
2046
2047
2048 if (dentry->d_sb->s_op != &cgroup_ops ||
2049 !S_ISDIR(dentry->d_inode->i_mode))
2050 goto err;
2051
2052 ret = 0;
2053 cgrp = dentry->d_fsdata;
2054 rcu_read_lock();
2055
2056 cgroup_iter_start(cgrp, &it);
2057 while ((tsk = cgroup_iter_next(cgrp, &it))) {
2058 switch (tsk->state) {
2059 case TASK_RUNNING:
2060 stats->nr_running++;
2061 break;
2062 case TASK_INTERRUPTIBLE:
2063 stats->nr_sleeping++;
2064 break;
2065 case TASK_UNINTERRUPTIBLE:
2066 stats->nr_uninterruptible++;
2067 break;
2068 case TASK_STOPPED:
2069 stats->nr_stopped++;
2070 break;
2071 default:
2072 if (delayacct_is_task_waiting_on_io(tsk))
2073 stats->nr_io_wait++;
2074 break;
2075 }
2076 }
2077 cgroup_iter_end(cgrp, &it);
2078
2079 rcu_read_unlock();
2080err:
2081 return ret;
2082}
2083
2084static int cmppid(const void *a, const void *b)
2085{
2086 return *(pid_t *)a - *(pid_t *)b;
2087}
2088
2089
2090
2091
2092
2093
2094
2095
2096static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2097{
2098
2099
2100
2101
2102
2103
2104 struct cgroup *cgrp = s->private;
2105 int index = 0, pid = *pos;
2106 int *iter;
2107
2108 down_read(&cgrp->pids_mutex);
2109 if (pid) {
2110 int end = cgrp->pids_length;
2111
2112 while (index < end) {
2113 int mid = (index + end) / 2;
2114 if (cgrp->tasks_pids[mid] == pid) {
2115 index = mid;
2116 break;
2117 } else if (cgrp->tasks_pids[mid] <= pid)
2118 index = mid + 1;
2119 else
2120 end = mid;
2121 }
2122 }
2123
2124 if (index >= cgrp->pids_length)
2125 return NULL;
2126
2127 iter = cgrp->tasks_pids + index;
2128 *pos = *iter;
2129 return iter;
2130}
2131
2132static void cgroup_tasks_stop(struct seq_file *s, void *v)
2133{
2134 struct cgroup *cgrp = s->private;
2135 up_read(&cgrp->pids_mutex);
2136}
2137
2138static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2139{
2140 struct cgroup *cgrp = s->private;
2141 int *p = v;
2142 int *end = cgrp->tasks_pids + cgrp->pids_length;
2143
2144
2145
2146
2147
2148 p++;
2149 if (p >= end) {
2150 return NULL;
2151 } else {
2152 *pos = *p;
2153 return p;
2154 }
2155}
2156
2157static int cgroup_tasks_show(struct seq_file *s, void *v)
2158{
2159 return seq_printf(s, "%d\n", *(int *)v);
2160}
2161
2162static struct seq_operations cgroup_tasks_seq_operations = {
2163 .start = cgroup_tasks_start,
2164 .stop = cgroup_tasks_stop,
2165 .next = cgroup_tasks_next,
2166 .show = cgroup_tasks_show,
2167};
2168
2169static void release_cgroup_pid_array(struct cgroup *cgrp)
2170{
2171 down_write(&cgrp->pids_mutex);
2172 BUG_ON(!cgrp->pids_use_count);
2173 if (!--cgrp->pids_use_count) {
2174 kfree(cgrp->tasks_pids);
2175 cgrp->tasks_pids = NULL;
2176 cgrp->pids_length = 0;
2177 }
2178 up_write(&cgrp->pids_mutex);
2179}
2180
2181static int cgroup_tasks_release(struct inode *inode, struct file *file)
2182{
2183 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2184
2185 if (!(file->f_mode & FMODE_READ))
2186 return 0;
2187
2188 release_cgroup_pid_array(cgrp);
2189 return seq_release(inode, file);
2190}
2191
2192static struct file_operations cgroup_tasks_operations = {
2193 .read = seq_read,
2194 .llseek = seq_lseek,
2195 .write = cgroup_file_write,
2196 .release = cgroup_tasks_release,
2197};
2198
2199
2200
2201
2202
2203
2204static int cgroup_tasks_open(struct inode *unused, struct file *file)
2205{
2206 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2207 pid_t *pidarray;
2208 int npids;
2209 int retval;
2210
2211
2212 if (!(file->f_mode & FMODE_READ))
2213 return 0;
2214
2215
2216
2217
2218
2219
2220
2221 npids = cgroup_task_count(cgrp);
2222 pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
2223 if (!pidarray)
2224 return -ENOMEM;
2225 npids = pid_array_load(pidarray, npids, cgrp);
2226 sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
2227
2228
2229
2230
2231
2232 down_write(&cgrp->pids_mutex);
2233 kfree(cgrp->tasks_pids);
2234 cgrp->tasks_pids = pidarray;
2235 cgrp->pids_length = npids;
2236 cgrp->pids_use_count++;
2237 up_write(&cgrp->pids_mutex);
2238
2239 file->f_op = &cgroup_tasks_operations;
2240
2241 retval = seq_open(file, &cgroup_tasks_seq_operations);
2242 if (retval) {
2243 release_cgroup_pid_array(cgrp);
2244 return retval;
2245 }
2246 ((struct seq_file *)file->private_data)->private = cgrp;
2247 return 0;
2248}
2249
2250static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
2251 struct cftype *cft)
2252{
2253 return notify_on_release(cgrp);
2254}
2255
2256static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2257 struct cftype *cft,
2258 u64 val)
2259{
2260 clear_bit(CGRP_RELEASABLE, &cgrp->flags);
2261 if (val)
2262 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2263 else
2264 clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2265 return 0;
2266}
2267
2268
2269
2270
2271static struct cftype files[] = {
2272 {
2273 .name = "tasks",
2274 .open = cgroup_tasks_open,
2275 .write_u64 = cgroup_tasks_write,
2276 .release = cgroup_tasks_release,
2277 .private = FILE_TASKLIST,
2278 },
2279
2280 {
2281 .name = "notify_on_release",
2282 .read_u64 = cgroup_read_notify_on_release,
2283 .write_u64 = cgroup_write_notify_on_release,
2284 .private = FILE_NOTIFY_ON_RELEASE,
2285 },
2286};
2287
2288static struct cftype cft_release_agent = {
2289 .name = "release_agent",
2290 .read_seq_string = cgroup_release_agent_show,
2291 .write_string = cgroup_release_agent_write,
2292 .max_write_len = PATH_MAX,
2293 .private = FILE_RELEASE_AGENT,
2294};
2295
2296static int cgroup_populate_dir(struct cgroup *cgrp)
2297{
2298 int err;
2299 struct cgroup_subsys *ss;
2300
2301
2302 cgroup_clear_directory(cgrp->dentry);
2303
2304 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
2305 if (err < 0)
2306 return err;
2307
2308 if (cgrp == cgrp->top_cgroup) {
2309 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
2310 return err;
2311 }
2312
2313 for_each_subsys(cgrp->root, ss) {
2314 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0)
2315 return err;
2316 }
2317
2318 return 0;
2319}
2320
2321static void init_cgroup_css(struct cgroup_subsys_state *css,
2322 struct cgroup_subsys *ss,
2323 struct cgroup *cgrp)
2324{
2325 css->cgroup = cgrp;
2326 atomic_set(&css->refcnt, 0);
2327 css->flags = 0;
2328 if (cgrp == dummytop)
2329 set_bit(CSS_ROOT, &css->flags);
2330 BUG_ON(cgrp->subsys[ss->subsys_id]);
2331 cgrp->subsys[ss->subsys_id] = css;
2332}
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
2343 int mode)
2344{
2345 struct cgroup *cgrp;
2346 struct cgroupfs_root *root = parent->root;
2347 int err = 0;
2348 struct cgroup_subsys *ss;
2349 struct super_block *sb = root->sb;
2350
2351 cgrp = kzalloc(sizeof(*cgrp), GFP_KERNEL);
2352 if (!cgrp)
2353 return -ENOMEM;
2354
2355
2356
2357
2358
2359
2360 atomic_inc(&sb->s_active);
2361
2362 mutex_lock(&cgroup_mutex);
2363
2364 init_cgroup_housekeeping(cgrp);
2365
2366 cgrp->parent = parent;
2367 cgrp->root = parent->root;
2368 cgrp->top_cgroup = parent->top_cgroup;
2369
2370 if (notify_on_release(parent))
2371 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
2372
2373 for_each_subsys(root, ss) {
2374 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
2375 if (IS_ERR(css)) {
2376 err = PTR_ERR(css);
2377 goto err_destroy;
2378 }
2379 init_cgroup_css(css, ss, cgrp);
2380 }
2381
2382 list_add(&cgrp->sibling, &cgrp->parent->children);
2383 root->number_of_cgroups++;
2384
2385 err = cgroup_create_dir(cgrp, dentry, mode);
2386 if (err < 0)
2387 goto err_remove;
2388
2389
2390 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
2391
2392 err = cgroup_populate_dir(cgrp);
2393
2394
2395 mutex_unlock(&cgroup_mutex);
2396 mutex_unlock(&cgrp->dentry->d_inode->i_mutex);
2397
2398 return 0;
2399
2400 err_remove:
2401
2402 list_del(&cgrp->sibling);
2403 root->number_of_cgroups--;
2404
2405 err_destroy:
2406
2407 for_each_subsys(root, ss) {
2408 if (cgrp->subsys[ss->subsys_id])
2409 ss->destroy(ss, cgrp);
2410 }
2411
2412 mutex_unlock(&cgroup_mutex);
2413
2414
2415 deactivate_super(sb);
2416
2417 kfree(cgrp);
2418 return err;
2419}
2420
2421static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2422{
2423 struct cgroup *c_parent = dentry->d_parent->d_fsdata;
2424
2425
2426 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
2427}
2428
2429static int cgroup_has_css_refs(struct cgroup *cgrp)
2430{
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440 int i;
2441 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2442 struct cgroup_subsys *ss = subsys[i];
2443 struct cgroup_subsys_state *css;
2444
2445 if (ss->root != cgrp->root)
2446 continue;
2447 css = cgrp->subsys[ss->subsys_id];
2448
2449
2450
2451
2452
2453
2454 if (css && atomic_read(&css->refcnt))
2455 return 1;
2456 }
2457 return 0;
2458}
2459
2460static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2461{
2462 struct cgroup *cgrp = dentry->d_fsdata;
2463 struct dentry *d;
2464 struct cgroup *parent;
2465 struct super_block *sb;
2466 struct cgroupfs_root *root;
2467
2468
2469
2470 mutex_lock(&cgroup_mutex);
2471 if (atomic_read(&cgrp->count) != 0) {
2472 mutex_unlock(&cgroup_mutex);
2473 return -EBUSY;
2474 }
2475 if (!list_empty(&cgrp->children)) {
2476 mutex_unlock(&cgroup_mutex);
2477 return -EBUSY;
2478 }
2479 mutex_unlock(&cgroup_mutex);
2480
2481
2482
2483
2484
2485 cgroup_call_pre_destroy(cgrp);
2486
2487 mutex_lock(&cgroup_mutex);
2488 parent = cgrp->parent;
2489 root = cgrp->root;
2490 sb = root->sb;
2491
2492 if (atomic_read(&cgrp->count)
2493 || !list_empty(&cgrp->children)
2494 || cgroup_has_css_refs(cgrp)) {
2495 mutex_unlock(&cgroup_mutex);
2496 return -EBUSY;
2497 }
2498
2499 spin_lock(&release_list_lock);
2500 set_bit(CGRP_REMOVED, &cgrp->flags);
2501 if (!list_empty(&cgrp->release_list))
2502 list_del(&cgrp->release_list);
2503 spin_unlock(&release_list_lock);
2504
2505 list_del(&cgrp->sibling);
2506 spin_lock(&cgrp->dentry->d_lock);
2507 d = dget(cgrp->dentry);
2508 spin_unlock(&d->d_lock);
2509
2510 cgroup_d_remove_dir(d);
2511 dput(d);
2512
2513 set_bit(CGRP_RELEASABLE, &parent->flags);
2514 check_for_release(parent);
2515
2516 mutex_unlock(&cgroup_mutex);
2517 return 0;
2518}
2519
2520static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
2521{
2522 struct cgroup_subsys_state *css;
2523
2524 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
2525
2526
2527 ss->root = &rootnode;
2528 css = ss->create(ss, dummytop);
2529
2530 BUG_ON(IS_ERR(css));
2531 init_cgroup_css(css, ss, dummytop);
2532
2533
2534
2535
2536
2537 init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
2538
2539 need_forkexit_callback |= ss->fork || ss->exit;
2540 need_mm_owner_callback |= !!ss->mm_owner_changed;
2541
2542
2543
2544
2545 BUG_ON(!list_empty(&init_task.tasks));
2546
2547 ss->active = 1;
2548}
2549
2550
2551
2552
2553
2554
2555
2556int __init cgroup_init_early(void)
2557{
2558 int i;
2559 atomic_set(&init_css_set.refcount, 1);
2560 INIT_LIST_HEAD(&init_css_set.cg_links);
2561 INIT_LIST_HEAD(&init_css_set.tasks);
2562 INIT_HLIST_NODE(&init_css_set.hlist);
2563 css_set_count = 1;
2564 init_cgroup_root(&rootnode);
2565 list_add(&rootnode.root_list, &roots);
2566 root_count = 1;
2567 init_task.cgroups = &init_css_set;
2568
2569 init_css_set_link.cg = &init_css_set;
2570 list_add(&init_css_set_link.cgrp_link_list,
2571 &rootnode.top_cgroup.css_sets);
2572 list_add(&init_css_set_link.cg_link_list,
2573 &init_css_set.cg_links);
2574
2575 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
2576 INIT_HLIST_HEAD(&css_set_table[i]);
2577
2578 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2579 struct cgroup_subsys *ss = subsys[i];
2580
2581 BUG_ON(!ss->name);
2582 BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
2583 BUG_ON(!ss->create);
2584 BUG_ON(!ss->destroy);
2585 if (ss->subsys_id != i) {
2586 printk(KERN_ERR "cgroup: Subsys %s id == %d\n",
2587 ss->name, ss->subsys_id);
2588 BUG();
2589 }
2590
2591 if (ss->early_init)
2592 cgroup_init_subsys(ss);
2593 }
2594 return 0;
2595}
2596
2597
2598
2599
2600
2601
2602
2603int __init cgroup_init(void)
2604{
2605 int err;
2606 int i;
2607 struct hlist_head *hhead;
2608
2609 err = bdi_init(&cgroup_backing_dev_info);
2610 if (err)
2611 return err;
2612
2613 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2614 struct cgroup_subsys *ss = subsys[i];
2615 if (!ss->early_init)
2616 cgroup_init_subsys(ss);
2617 }
2618
2619
2620 hhead = css_set_hash(init_css_set.subsys);
2621 hlist_add_head(&init_css_set.hlist, hhead);
2622
2623 err = register_filesystem(&cgroup_fs_type);
2624 if (err < 0)
2625 goto out;
2626
2627 proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations);
2628
2629out:
2630 if (err)
2631 bdi_destroy(&cgroup_backing_dev_info);
2632
2633 return err;
2634}
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649static int proc_cgroup_show(struct seq_file *m, void *v)
2650{
2651 struct pid *pid;
2652 struct task_struct *tsk;
2653 char *buf;
2654 int retval;
2655 struct cgroupfs_root *root;
2656
2657 retval = -ENOMEM;
2658 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
2659 if (!buf)
2660 goto out;
2661
2662 retval = -ESRCH;
2663 pid = m->private;
2664 tsk = get_pid_task(pid, PIDTYPE_PID);
2665 if (!tsk)
2666 goto out_free;
2667
2668 retval = 0;
2669
2670 mutex_lock(&cgroup_mutex);
2671
2672 for_each_root(root) {
2673 struct cgroup_subsys *ss;
2674 struct cgroup *cgrp;
2675 int subsys_id;
2676 int count = 0;
2677
2678
2679 if (!root->actual_subsys_bits)
2680 continue;
2681 seq_printf(m, "%lu:", root->subsys_bits);
2682 for_each_subsys(root, ss)
2683 seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
2684 seq_putc(m, ':');
2685 get_first_subsys(&root->top_cgroup, NULL, &subsys_id);
2686 cgrp = task_cgroup(tsk, subsys_id);
2687 retval = cgroup_path(cgrp, buf, PAGE_SIZE);
2688 if (retval < 0)
2689 goto out_unlock;
2690 seq_puts(m, buf);
2691 seq_putc(m, '\n');
2692 }
2693
2694out_unlock:
2695 mutex_unlock(&cgroup_mutex);
2696 put_task_struct(tsk);
2697out_free:
2698 kfree(buf);
2699out:
2700 return retval;
2701}
2702
2703static int cgroup_open(struct inode *inode, struct file *file)
2704{
2705 struct pid *pid = PROC_I(inode)->pid;
2706 return single_open(file, proc_cgroup_show, pid);
2707}
2708
2709struct file_operations proc_cgroup_operations = {
2710 .open = cgroup_open,
2711 .read = seq_read,
2712 .llseek = seq_lseek,
2713 .release = single_release,
2714};
2715
2716
2717static int proc_cgroupstats_show(struct seq_file *m, void *v)
2718{
2719 int i;
2720
2721 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
2722 mutex_lock(&cgroup_mutex);
2723 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2724 struct cgroup_subsys *ss = subsys[i];
2725 seq_printf(m, "%s\t%lu\t%d\t%d\n",
2726 ss->name, ss->root->subsys_bits,
2727 ss->root->number_of_cgroups, !ss->disabled);
2728 }
2729 mutex_unlock(&cgroup_mutex);
2730 return 0;
2731}
2732
2733static int cgroupstats_open(struct inode *inode, struct file *file)
2734{
2735 return single_open(file, proc_cgroupstats_show, NULL);
2736}
2737
2738static struct file_operations proc_cgroupstats_operations = {
2739 .open = cgroupstats_open,
2740 .read = seq_read,
2741 .llseek = seq_lseek,
2742 .release = single_release,
2743};
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761void cgroup_fork(struct task_struct *child)
2762{
2763 task_lock(current);
2764 child->cgroups = current->cgroups;
2765 get_css_set(child->cgroups);
2766 task_unlock(current);
2767 INIT_LIST_HEAD(&child->cg_list);
2768}
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778void cgroup_fork_callbacks(struct task_struct *child)
2779{
2780 if (need_forkexit_callback) {
2781 int i;
2782 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2783 struct cgroup_subsys *ss = subsys[i];
2784 if (ss->fork)
2785 ss->fork(ss, child);
2786 }
2787 }
2788}
2789
2790#ifdef CONFIG_MM_OWNER
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
2802{
2803 struct cgroup *oldcgrp, *newcgrp = NULL;
2804
2805 if (need_mm_owner_callback) {
2806 int i;
2807 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2808 struct cgroup_subsys *ss = subsys[i];
2809 oldcgrp = task_cgroup(old, ss->subsys_id);
2810 if (new)
2811 newcgrp = task_cgroup(new, ss->subsys_id);
2812 if (oldcgrp == newcgrp)
2813 continue;
2814 if (ss->mm_owner_changed)
2815 ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
2816 }
2817 }
2818}
2819#endif
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830void cgroup_post_fork(struct task_struct *child)
2831{
2832 if (use_task_css_set_links) {
2833 write_lock(&css_set_lock);
2834 if (list_empty(&child->cg_list))
2835 list_add(&child->cg_list, &child->cgroups->tasks);
2836 write_unlock(&css_set_lock);
2837 }
2838}
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874void cgroup_exit(struct task_struct *tsk, int run_callbacks)
2875{
2876 int i;
2877 struct css_set *cg;
2878
2879 if (run_callbacks && need_forkexit_callback) {
2880 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2881 struct cgroup_subsys *ss = subsys[i];
2882 if (ss->exit)
2883 ss->exit(ss, tsk);
2884 }
2885 }
2886
2887
2888
2889
2890
2891
2892 if (!list_empty(&tsk->cg_list)) {
2893 write_lock(&css_set_lock);
2894 if (!list_empty(&tsk->cg_list))
2895 list_del(&tsk->cg_list);
2896 write_unlock(&css_set_lock);
2897 }
2898
2899
2900 task_lock(tsk);
2901 cg = tsk->cgroups;
2902 tsk->cgroups = &init_css_set;
2903 task_unlock(tsk);
2904 if (cg)
2905 put_css_set_taskexit(cg);
2906}
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
2919 char *nodename)
2920{
2921 struct dentry *dentry;
2922 int ret = 0;
2923 struct cgroup *parent, *child;
2924 struct inode *inode;
2925 struct css_set *cg;
2926 struct cgroupfs_root *root;
2927 struct cgroup_subsys *ss;
2928
2929
2930 BUG_ON(!subsys->active);
2931
2932
2933
2934 mutex_lock(&cgroup_mutex);
2935 again:
2936 root = subsys->root;
2937 if (root == &rootnode) {
2938 mutex_unlock(&cgroup_mutex);
2939 return 0;
2940 }
2941 cg = tsk->cgroups;
2942 parent = task_cgroup(tsk, subsys->subsys_id);
2943
2944
2945 atomic_inc(&parent->root->sb->s_active);
2946
2947
2948 get_css_set(cg);
2949 mutex_unlock(&cgroup_mutex);
2950
2951
2952 inode = parent->dentry->d_inode;
2953
2954
2955
2956 mutex_lock(&inode->i_mutex);
2957 dentry = lookup_one_len(nodename, parent->dentry, strlen(nodename));
2958 if (IS_ERR(dentry)) {
2959 printk(KERN_INFO
2960 "cgroup: Couldn't allocate dentry for %s: %ld\n", nodename,
2961 PTR_ERR(dentry));
2962 ret = PTR_ERR(dentry);
2963 goto out_release;
2964 }
2965
2966
2967 ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
2968 child = __d_cgrp(dentry);
2969 dput(dentry);
2970 if (ret) {
2971 printk(KERN_INFO
2972 "Failed to create cgroup %s: %d\n", nodename,
2973 ret);
2974 goto out_release;
2975 }
2976
2977 if (!child) {
2978 printk(KERN_INFO
2979 "Couldn't find new cgroup %s\n", nodename);
2980 ret = -ENOMEM;
2981 goto out_release;
2982 }
2983
2984
2985
2986
2987 mutex_lock(&cgroup_mutex);
2988 if ((root != subsys->root) ||
2989 (parent != task_cgroup(tsk, subsys->subsys_id))) {
2990
2991 mutex_unlock(&inode->i_mutex);
2992 put_css_set(cg);
2993
2994 deactivate_super(parent->root->sb);
2995
2996
2997
2998 printk(KERN_INFO
2999 "Race in cgroup_clone() - leaking cgroup %s\n",
3000 nodename);
3001 goto again;
3002 }
3003
3004
3005 for_each_subsys(root, ss) {
3006 if (ss->post_clone)
3007 ss->post_clone(ss, child);
3008 }
3009
3010
3011 ret = cgroup_attach_task(child, tsk);
3012 mutex_unlock(&cgroup_mutex);
3013
3014 out_release:
3015 mutex_unlock(&inode->i_mutex);
3016
3017 mutex_lock(&cgroup_mutex);
3018 put_css_set(cg);
3019 mutex_unlock(&cgroup_mutex);
3020 deactivate_super(parent->root->sb);
3021 return ret;
3022}
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036int cgroup_is_descendant(const struct cgroup *cgrp)
3037{
3038 int ret;
3039 struct cgroup *target;
3040 int subsys_id;
3041
3042 if (cgrp == dummytop)
3043 return 1;
3044
3045 get_first_subsys(cgrp, NULL, &subsys_id);
3046 target = task_cgroup(current, subsys_id);
3047 while (cgrp != target && cgrp!= cgrp->top_cgroup)
3048 cgrp = cgrp->parent;
3049 ret = (cgrp == target);
3050 return ret;
3051}
3052
3053static void check_for_release(struct cgroup *cgrp)
3054{
3055
3056
3057 if (cgroup_is_releasable(cgrp) && !atomic_read(&cgrp->count)
3058 && list_empty(&cgrp->children) && !cgroup_has_css_refs(cgrp)) {
3059
3060
3061
3062 int need_schedule_work = 0;
3063 spin_lock(&release_list_lock);
3064 if (!cgroup_is_removed(cgrp) &&
3065 list_empty(&cgrp->release_list)) {
3066 list_add(&cgrp->release_list, &release_list);
3067 need_schedule_work = 1;
3068 }
3069 spin_unlock(&release_list_lock);
3070 if (need_schedule_work)
3071 schedule_work(&release_agent_work);
3072 }
3073}
3074
3075void __css_put(struct cgroup_subsys_state *css)
3076{
3077 struct cgroup *cgrp = css->cgroup;
3078 rcu_read_lock();
3079 if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
3080 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3081 check_for_release(cgrp);
3082 }
3083 rcu_read_unlock();
3084}
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109static void cgroup_release_agent(struct work_struct *work)
3110{
3111 BUG_ON(work != &release_agent_work);
3112 mutex_lock(&cgroup_mutex);
3113 spin_lock(&release_list_lock);
3114 while (!list_empty(&release_list)) {
3115 char *argv[3], *envp[3];
3116 int i;
3117 char *pathbuf = NULL, *agentbuf = NULL;
3118 struct cgroup *cgrp = list_entry(release_list.next,
3119 struct cgroup,
3120 release_list);
3121 list_del_init(&cgrp->release_list);
3122 spin_unlock(&release_list_lock);
3123 pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
3124 if (!pathbuf)
3125 goto continue_free;
3126 if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
3127 goto continue_free;
3128 agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
3129 if (!agentbuf)
3130 goto continue_free;
3131
3132 i = 0;
3133 argv[i++] = agentbuf;
3134 argv[i++] = pathbuf;
3135 argv[i] = NULL;
3136
3137 i = 0;
3138
3139 envp[i++] = "HOME=/";
3140 envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
3141 envp[i] = NULL;
3142
3143
3144
3145
3146 mutex_unlock(&cgroup_mutex);
3147 call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
3148 mutex_lock(&cgroup_mutex);
3149 continue_free:
3150 kfree(pathbuf);
3151 kfree(agentbuf);
3152 spin_lock(&release_list_lock);
3153 }
3154 spin_unlock(&release_list_lock);
3155 mutex_unlock(&cgroup_mutex);
3156}
3157
3158static int __init cgroup_disable(char *str)
3159{
3160 int i;
3161 char *token;
3162
3163 while ((token = strsep(&str, ",")) != NULL) {
3164 if (!*token)
3165 continue;
3166
3167 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3168 struct cgroup_subsys *ss = subsys[i];
3169
3170 if (!strcmp(token, ss->name)) {
3171 ss->disabled = 1;
3172 printk(KERN_INFO "Disabling %s control group"
3173 " subsystem\n", ss->name);
3174 break;
3175 }
3176 }
3177 }
3178 return 1;
3179}
3180__setup("cgroup_disable=", cgroup_disable);