1
2
3
4
5
6
7
8#include <linux/dm-dirty-log.h>
9#include <linux/dm-region-hash.h>
10
11#include <linux/ctype.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/vmalloc.h>
15
16#include "dm.h"
17#include "dm-bio-list.h"
18
19#define DM_MSG_PREFIX "region hash"
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57struct dm_region_hash {
58 uint32_t region_size;
59 unsigned region_shift;
60
61
62 struct dm_dirty_log *log;
63
64
65 rwlock_t hash_lock;
66 mempool_t *region_pool;
67 unsigned mask;
68 unsigned nr_buckets;
69 unsigned prime;
70 unsigned shift;
71 struct list_head *buckets;
72
73 unsigned max_recovery;
74
75 spinlock_t region_lock;
76 atomic_t recovery_in_flight;
77 struct semaphore recovery_count;
78 struct list_head clean_regions;
79 struct list_head quiesced_regions;
80 struct list_head recovered_regions;
81 struct list_head failed_recovered_regions;
82
83 void *context;
84 sector_t target_begin;
85
86
87 void (*dispatch_bios)(void *context, struct bio_list *bios);
88
89
90 void (*wakeup_workers)(void *context);
91
92
93 void (*wakeup_all_recovery_waiters)(void *context);
94};
95
96struct dm_region {
97 struct dm_region_hash *rh;
98 region_t key;
99 int state;
100
101 struct list_head hash_list;
102 struct list_head list;
103
104 atomic_t pending;
105 struct bio_list delayed_bios;
106};
107
108
109
110
111static region_t dm_rh_sector_to_region(struct dm_region_hash *rh, sector_t sector)
112{
113 return sector >> rh->region_shift;
114}
115
116sector_t dm_rh_region_to_sector(struct dm_region_hash *rh, region_t region)
117{
118 return region << rh->region_shift;
119}
120EXPORT_SYMBOL_GPL(dm_rh_region_to_sector);
121
122region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
123{
124 return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
125}
126EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
127
128void *dm_rh_region_context(struct dm_region *reg)
129{
130 return reg->rh->context;
131}
132EXPORT_SYMBOL_GPL(dm_rh_region_context);
133
134region_t dm_rh_get_region_key(struct dm_region *reg)
135{
136 return reg->key;
137}
138EXPORT_SYMBOL_GPL(dm_rh_get_region_key);
139
140sector_t dm_rh_get_region_size(struct dm_region_hash *rh)
141{
142 return rh->region_size;
143}
144EXPORT_SYMBOL_GPL(dm_rh_get_region_size);
145
146
147
148
149
150#define RH_HASH_MULT 2654435387U
151#define RH_HASH_SHIFT 12
152
153#define MIN_REGIONS 64
154struct dm_region_hash *dm_region_hash_create(
155 void *context, void (*dispatch_bios)(void *context,
156 struct bio_list *bios),
157 void (*wakeup_workers)(void *context),
158 void (*wakeup_all_recovery_waiters)(void *context),
159 sector_t target_begin, unsigned max_recovery,
160 struct dm_dirty_log *log, uint32_t region_size,
161 region_t nr_regions)
162{
163 struct dm_region_hash *rh;
164 unsigned nr_buckets, max_buckets;
165 size_t i;
166
167
168
169
170
171 max_buckets = nr_regions >> 6;
172 for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
173 ;
174 nr_buckets >>= 1;
175
176 rh = kmalloc(sizeof(*rh), GFP_KERNEL);
177 if (!rh) {
178 DMERR("unable to allocate region hash memory");
179 return ERR_PTR(-ENOMEM);
180 }
181
182 rh->context = context;
183 rh->dispatch_bios = dispatch_bios;
184 rh->wakeup_workers = wakeup_workers;
185 rh->wakeup_all_recovery_waiters = wakeup_all_recovery_waiters;
186 rh->target_begin = target_begin;
187 rh->max_recovery = max_recovery;
188 rh->log = log;
189 rh->region_size = region_size;
190 rh->region_shift = ffs(region_size) - 1;
191 rwlock_init(&rh->hash_lock);
192 rh->mask = nr_buckets - 1;
193 rh->nr_buckets = nr_buckets;
194
195 rh->shift = RH_HASH_SHIFT;
196 rh->prime = RH_HASH_MULT;
197
198 rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
199 if (!rh->buckets) {
200 DMERR("unable to allocate region hash bucket memory");
201 kfree(rh);
202 return ERR_PTR(-ENOMEM);
203 }
204
205 for (i = 0; i < nr_buckets; i++)
206 INIT_LIST_HEAD(rh->buckets + i);
207
208 spin_lock_init(&rh->region_lock);
209 sema_init(&rh->recovery_count, 0);
210 atomic_set(&rh->recovery_in_flight, 0);
211 INIT_LIST_HEAD(&rh->clean_regions);
212 INIT_LIST_HEAD(&rh->quiesced_regions);
213 INIT_LIST_HEAD(&rh->recovered_regions);
214 INIT_LIST_HEAD(&rh->failed_recovered_regions);
215
216 rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
217 sizeof(struct dm_region));
218 if (!rh->region_pool) {
219 vfree(rh->buckets);
220 kfree(rh);
221 rh = ERR_PTR(-ENOMEM);
222 }
223
224 return rh;
225}
226EXPORT_SYMBOL_GPL(dm_region_hash_create);
227
228void dm_region_hash_destroy(struct dm_region_hash *rh)
229{
230 unsigned h;
231 struct dm_region *reg, *nreg;
232
233 BUG_ON(!list_empty(&rh->quiesced_regions));
234 for (h = 0; h < rh->nr_buckets; h++) {
235 list_for_each_entry_safe(reg, nreg, rh->buckets + h,
236 hash_list) {
237 BUG_ON(atomic_read(®->pending));
238 mempool_free(reg, rh->region_pool);
239 }
240 }
241
242 if (rh->log)
243 dm_dirty_log_destroy(rh->log);
244
245 if (rh->region_pool)
246 mempool_destroy(rh->region_pool);
247
248 vfree(rh->buckets);
249 kfree(rh);
250}
251EXPORT_SYMBOL_GPL(dm_region_hash_destroy);
252
253struct dm_dirty_log *dm_rh_dirty_log(struct dm_region_hash *rh)
254{
255 return rh->log;
256}
257EXPORT_SYMBOL_GPL(dm_rh_dirty_log);
258
259static unsigned rh_hash(struct dm_region_hash *rh, region_t region)
260{
261 return (unsigned) ((region * rh->prime) >> rh->shift) & rh->mask;
262}
263
264static struct dm_region *__rh_lookup(struct dm_region_hash *rh, region_t region)
265{
266 struct dm_region *reg;
267 struct list_head *bucket = rh->buckets + rh_hash(rh, region);
268
269 list_for_each_entry(reg, bucket, hash_list)
270 if (reg->key == region)
271 return reg;
272
273 return NULL;
274}
275
276static void __rh_insert(struct dm_region_hash *rh, struct dm_region *reg)
277{
278 list_add(®->hash_list, rh->buckets + rh_hash(rh, reg->key));
279}
280
281static struct dm_region *__rh_alloc(struct dm_region_hash *rh, region_t region)
282{
283 struct dm_region *reg, *nreg;
284
285 nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
286 if (unlikely(!nreg))
287 nreg = kmalloc(sizeof(*nreg), GFP_NOIO);
288
289 nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
290 DM_RH_CLEAN : DM_RH_NOSYNC;
291 nreg->rh = rh;
292 nreg->key = region;
293 INIT_LIST_HEAD(&nreg->list);
294 atomic_set(&nreg->pending, 0);
295 bio_list_init(&nreg->delayed_bios);
296
297 write_lock_irq(&rh->hash_lock);
298 reg = __rh_lookup(rh, region);
299 if (reg)
300
301 mempool_free(nreg, rh->region_pool);
302 else {
303 __rh_insert(rh, nreg);
304 if (nreg->state == DM_RH_CLEAN) {
305 spin_lock(&rh->region_lock);
306 list_add(&nreg->list, &rh->clean_regions);
307 spin_unlock(&rh->region_lock);
308 }
309
310 reg = nreg;
311 }
312 write_unlock_irq(&rh->hash_lock);
313
314 return reg;
315}
316
317static struct dm_region *__rh_find(struct dm_region_hash *rh, region_t region)
318{
319 struct dm_region *reg;
320
321 reg = __rh_lookup(rh, region);
322 if (!reg) {
323 read_unlock(&rh->hash_lock);
324 reg = __rh_alloc(rh, region);
325 read_lock(&rh->hash_lock);
326 }
327
328 return reg;
329}
330
331int dm_rh_get_state(struct dm_region_hash *rh, region_t region, int may_block)
332{
333 int r;
334 struct dm_region *reg;
335
336 read_lock(&rh->hash_lock);
337 reg = __rh_lookup(rh, region);
338 read_unlock(&rh->hash_lock);
339
340 if (reg)
341 return reg->state;
342
343
344
345
346
347 r = rh->log->type->in_sync(rh->log, region, may_block);
348
349
350
351
352
353 return r == 1 ? DM_RH_CLEAN : DM_RH_NOSYNC;
354}
355EXPORT_SYMBOL_GPL(dm_rh_get_state);
356
357static void complete_resync_work(struct dm_region *reg, int success)
358{
359 struct dm_region_hash *rh = reg->rh;
360
361 rh->log->type->set_region_sync(rh->log, reg->key, success);
362
363
364
365
366
367
368
369
370
371
372 rh->dispatch_bios(rh->context, ®->delayed_bios);
373 if (atomic_dec_and_test(&rh->recovery_in_flight))
374 rh->wakeup_all_recovery_waiters(rh->context);
375 up(&rh->recovery_count);
376}
377
378
379
380
381
382
383
384
385
386
387
388
389
390void dm_rh_mark_nosync(struct dm_region_hash *rh,
391 struct bio *bio, unsigned done, int error)
392{
393 unsigned long flags;
394 struct dm_dirty_log *log = rh->log;
395 struct dm_region *reg;
396 region_t region = dm_rh_bio_to_region(rh, bio);
397 int recovering = 0;
398
399
400 log->type->set_region_sync(log, region, 0);
401
402 read_lock(&rh->hash_lock);
403 reg = __rh_find(rh, region);
404 read_unlock(&rh->hash_lock);
405
406
407 BUG_ON(!reg);
408 BUG_ON(!list_empty(®->list));
409
410 spin_lock_irqsave(&rh->region_lock, flags);
411
412
413
414
415
416
417
418 recovering = (reg->state == DM_RH_RECOVERING);
419 reg->state = DM_RH_NOSYNC;
420 BUG_ON(!list_empty(®->list));
421 spin_unlock_irqrestore(&rh->region_lock, flags);
422
423 bio_endio(bio, error);
424 if (recovering)
425 complete_resync_work(reg, 0);
426}
427EXPORT_SYMBOL_GPL(dm_rh_mark_nosync);
428
429void dm_rh_update_states(struct dm_region_hash *rh, int errors_handled)
430{
431 struct dm_region *reg, *next;
432
433 LIST_HEAD(clean);
434 LIST_HEAD(recovered);
435 LIST_HEAD(failed_recovered);
436
437
438
439
440 write_lock_irq(&rh->hash_lock);
441 spin_lock(&rh->region_lock);
442 if (!list_empty(&rh->clean_regions)) {
443 list_splice_init(&rh->clean_regions, &clean);
444
445 list_for_each_entry(reg, &clean, list)
446 list_del(®->hash_list);
447 }
448
449 if (!list_empty(&rh->recovered_regions)) {
450 list_splice_init(&rh->recovered_regions, &recovered);
451
452 list_for_each_entry(reg, &recovered, list)
453 list_del(®->hash_list);
454 }
455
456 if (!list_empty(&rh->failed_recovered_regions)) {
457 list_splice_init(&rh->failed_recovered_regions,
458 &failed_recovered);
459
460 list_for_each_entry(reg, &failed_recovered, list)
461 list_del(®->hash_list);
462 }
463
464 spin_unlock(&rh->region_lock);
465 write_unlock_irq(&rh->hash_lock);
466
467
468
469
470
471
472 list_for_each_entry_safe(reg, next, &recovered, list) {
473 rh->log->type->clear_region(rh->log, reg->key);
474 complete_resync_work(reg, 1);
475 mempool_free(reg, rh->region_pool);
476 }
477
478 list_for_each_entry_safe(reg, next, &failed_recovered, list) {
479 complete_resync_work(reg, errors_handled ? 0 : 1);
480 mempool_free(reg, rh->region_pool);
481 }
482
483 list_for_each_entry_safe(reg, next, &clean, list) {
484 rh->log->type->clear_region(rh->log, reg->key);
485 mempool_free(reg, rh->region_pool);
486 }
487
488 rh->log->type->flush(rh->log);
489}
490EXPORT_SYMBOL_GPL(dm_rh_update_states);
491
492static void rh_inc(struct dm_region_hash *rh, region_t region)
493{
494 struct dm_region *reg;
495
496 read_lock(&rh->hash_lock);
497 reg = __rh_find(rh, region);
498
499 spin_lock_irq(&rh->region_lock);
500 atomic_inc(®->pending);
501
502 if (reg->state == DM_RH_CLEAN) {
503 reg->state = DM_RH_DIRTY;
504 list_del_init(®->list);
505 spin_unlock_irq(&rh->region_lock);
506
507 rh->log->type->mark_region(rh->log, reg->key);
508 } else
509 spin_unlock_irq(&rh->region_lock);
510
511
512 read_unlock(&rh->hash_lock);
513}
514
515void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
516{
517 struct bio *bio;
518
519 for (bio = bios->head; bio; bio = bio->bi_next)
520 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
521}
522EXPORT_SYMBOL_GPL(dm_rh_inc_pending);
523
524void dm_rh_dec(struct dm_region_hash *rh, region_t region)
525{
526 unsigned long flags;
527 struct dm_region *reg;
528 int should_wake = 0;
529
530 read_lock(&rh->hash_lock);
531 reg = __rh_lookup(rh, region);
532 read_unlock(&rh->hash_lock);
533
534 spin_lock_irqsave(&rh->region_lock, flags);
535 if (atomic_dec_and_test(®->pending)) {
536
537
538
539
540
541
542
543
544
545
546
547
548 if (reg->state == DM_RH_RECOVERING) {
549 list_add_tail(®->list, &rh->quiesced_regions);
550 } else if (reg->state == DM_RH_DIRTY) {
551 reg->state = DM_RH_CLEAN;
552 list_add(®->list, &rh->clean_regions);
553 }
554 should_wake = 1;
555 }
556 spin_unlock_irqrestore(&rh->region_lock, flags);
557
558 if (should_wake)
559 rh->wakeup_workers(rh->context);
560}
561EXPORT_SYMBOL_GPL(dm_rh_dec);
562
563
564
565
566static int __rh_recovery_prepare(struct dm_region_hash *rh)
567{
568 int r;
569 region_t region;
570 struct dm_region *reg;
571
572
573
574
575 r = rh->log->type->get_resync_work(rh->log, ®ion);
576 if (r <= 0)
577 return r;
578
579
580
581
582
583 read_lock(&rh->hash_lock);
584 reg = __rh_find(rh, region);
585 read_unlock(&rh->hash_lock);
586
587 spin_lock_irq(&rh->region_lock);
588 reg->state = DM_RH_RECOVERING;
589
590
591 if (atomic_read(®->pending))
592 list_del_init(®->list);
593 else
594 list_move(®->list, &rh->quiesced_regions);
595
596 spin_unlock_irq(&rh->region_lock);
597
598 return 1;
599}
600
601void dm_rh_recovery_prepare(struct dm_region_hash *rh)
602{
603
604 atomic_inc(&rh->recovery_in_flight);
605
606 while (!down_trylock(&rh->recovery_count)) {
607 atomic_inc(&rh->recovery_in_flight);
608 if (__rh_recovery_prepare(rh) <= 0) {
609 atomic_dec(&rh->recovery_in_flight);
610 up(&rh->recovery_count);
611 break;
612 }
613 }
614
615
616 if (atomic_dec_and_test(&rh->recovery_in_flight))
617 rh->wakeup_all_recovery_waiters(rh->context);
618}
619EXPORT_SYMBOL_GPL(dm_rh_recovery_prepare);
620
621
622
623
624struct dm_region *dm_rh_recovery_start(struct dm_region_hash *rh)
625{
626 struct dm_region *reg = NULL;
627
628 spin_lock_irq(&rh->region_lock);
629 if (!list_empty(&rh->quiesced_regions)) {
630 reg = list_entry(rh->quiesced_regions.next,
631 struct dm_region, list);
632 list_del_init(®->list);
633 }
634 spin_unlock_irq(&rh->region_lock);
635
636 return reg;
637}
638EXPORT_SYMBOL_GPL(dm_rh_recovery_start);
639
640void dm_rh_recovery_end(struct dm_region *reg, int success)
641{
642 struct dm_region_hash *rh = reg->rh;
643
644 spin_lock_irq(&rh->region_lock);
645 if (success)
646 list_add(®->list, ®->rh->recovered_regions);
647 else {
648 reg->state = DM_RH_NOSYNC;
649 list_add(®->list, ®->rh->failed_recovered_regions);
650 }
651 spin_unlock_irq(&rh->region_lock);
652
653 rh->wakeup_workers(rh->context);
654}
655EXPORT_SYMBOL_GPL(dm_rh_recovery_end);
656
657
658int dm_rh_recovery_in_flight(struct dm_region_hash *rh)
659{
660 return atomic_read(&rh->recovery_in_flight);
661}
662EXPORT_SYMBOL_GPL(dm_rh_recovery_in_flight);
663
664int dm_rh_flush(struct dm_region_hash *rh)
665{
666 return rh->log->type->flush(rh->log);
667}
668EXPORT_SYMBOL_GPL(dm_rh_flush);
669
670void dm_rh_delay(struct dm_region_hash *rh, struct bio *bio)
671{
672 struct dm_region *reg;
673
674 read_lock(&rh->hash_lock);
675 reg = __rh_find(rh, dm_rh_bio_to_region(rh, bio));
676 bio_list_add(®->delayed_bios, bio);
677 read_unlock(&rh->hash_lock);
678}
679EXPORT_SYMBOL_GPL(dm_rh_delay);
680
681void dm_rh_stop_recovery(struct dm_region_hash *rh)
682{
683 int i;
684
685
686 for (i = 0; i < rh->max_recovery; i++)
687 down(&rh->recovery_count);
688}
689EXPORT_SYMBOL_GPL(dm_rh_stop_recovery);
690
691void dm_rh_start_recovery(struct dm_region_hash *rh)
692{
693 int i;
694
695 for (i = 0; i < rh->max_recovery; i++)
696 up(&rh->recovery_count);
697
698 rh->wakeup_workers(rh->context);
699}
700EXPORT_SYMBOL_GPL(dm_rh_start_recovery);
701
702MODULE_DESCRIPTION(DM_NAME " region hash");
703MODULE_AUTHOR("Joe Thornber/Heinz Mauelshagen <dm-devel@redhat.com>");
704MODULE_LICENSE("GPL");