Showing error 769

User: Jiri Slaby
Error type: Invalid Pointer Dereference
Error type description: A pointer which is invalid is being dereferenced
File location: drivers/misc/sgi-xp/xpc_main.c
Line in file: 1041
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2011-11-07 22:22:22 UTC


Source:

   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
   7 */
   8
   9/*
  10 * Cross Partition Communication (XPC) support - standard version.
  11 *
  12 *        XPC provides a message passing capability that crosses partition
  13 *        boundaries. This module is made up of two parts:
  14 *
  15 *            partition        This part detects the presence/absence of other
  16 *                        partitions. It provides a heartbeat and monitors
  17 *                        the heartbeats of other partitions.
  18 *
  19 *            channel        This part manages the channels and sends/receives
  20 *                        messages across them to/from other partitions.
  21 *
  22 *        There are a couple of additional functions residing in XP, which
  23 *        provide an interface to XPC for its users.
  24 *
  25 *
  26 *        Caveats:
  27 *
  28 *          . Currently on sn2, we have no way to determine which nasid an IRQ
  29 *            came from. Thus, xpc_send_IRQ_sn2() does a remote amo write
  30 *            followed by an IPI. The amo indicates where data is to be pulled
  31 *            from, so after the IPI arrives, the remote partition checks the amo
  32 *            word. The IPI can actually arrive before the amo however, so other
  33 *            code must periodically check for this case. Also, remote amo
  34 *            operations do not reliably time out. Thus we do a remote PIO read
  35 *            solely to know whether the remote partition is down and whether we
  36 *            should stop sending IPIs to it. This remote PIO read operation is
  37 *            set up in a special nofault region so SAL knows to ignore (and
  38 *            cleanup) any errors due to the remote amo write, PIO read, and/or
  39 *            PIO write operations.
  40 *
  41 *            If/when new hardware solves this IPI problem, we should abandon
  42 *            the current approach.
  43 *
  44 */
  45
  46#include <linux/module.h>
  47#include <linux/sysctl.h>
  48#include <linux/device.h>
  49#include <linux/delay.h>
  50#include <linux/reboot.h>
  51#include <linux/kdebug.h>
  52#include <linux/kthread.h>
  53#include "xpc.h"
  54
  55/* define two XPC debug device structures to be used with dev_dbg() et al */
  56
  57struct device_driver xpc_dbg_name = {
  58        .name = "xpc"
  59};
  60
  61struct device xpc_part_dbg_subname = {
  62        .bus_id = {0},                /* set to "part" at xpc_init() time */
  63        .driver = &xpc_dbg_name
  64};
  65
  66struct device xpc_chan_dbg_subname = {
  67        .bus_id = {0},                /* set to "chan" at xpc_init() time */
  68        .driver = &xpc_dbg_name
  69};
  70
  71struct device *xpc_part = &xpc_part_dbg_subname;
  72struct device *xpc_chan = &xpc_chan_dbg_subname;
  73
  74static int xpc_kdebug_ignore;
  75
  76/* systune related variables for /proc/sys directories */
  77
  78static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
  79static int xpc_hb_min_interval = 1;
  80static int xpc_hb_max_interval = 10;
  81
  82static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
  83static int xpc_hb_check_min_interval = 10;
  84static int xpc_hb_check_max_interval = 120;
  85
  86int xpc_disengage_timelimit = XPC_DISENGAGE_DEFAULT_TIMELIMIT;
  87static int xpc_disengage_min_timelimit;        /* = 0 */
  88static int xpc_disengage_max_timelimit = 120;
  89
  90static ctl_table xpc_sys_xpc_hb_dir[] = {
  91        {
  92         .ctl_name = CTL_UNNUMBERED,
  93         .procname = "hb_interval",
  94         .data = &xpc_hb_interval,
  95         .maxlen = sizeof(int),
  96         .mode = 0644,
  97         .proc_handler = &proc_dointvec_minmax,
  98         .strategy = &sysctl_intvec,
  99         .extra1 = &xpc_hb_min_interval,
 100         .extra2 = &xpc_hb_max_interval},
 101        {
 102         .ctl_name = CTL_UNNUMBERED,
 103         .procname = "hb_check_interval",
 104         .data = &xpc_hb_check_interval,
 105         .maxlen = sizeof(int),
 106         .mode = 0644,
 107         .proc_handler = &proc_dointvec_minmax,
 108         .strategy = &sysctl_intvec,
 109         .extra1 = &xpc_hb_check_min_interval,
 110         .extra2 = &xpc_hb_check_max_interval},
 111        {}
 112};
 113static ctl_table xpc_sys_xpc_dir[] = {
 114        {
 115         .ctl_name = CTL_UNNUMBERED,
 116         .procname = "hb",
 117         .mode = 0555,
 118         .child = xpc_sys_xpc_hb_dir},
 119        {
 120         .ctl_name = CTL_UNNUMBERED,
 121         .procname = "disengage_timelimit",
 122         .data = &xpc_disengage_timelimit,
 123         .maxlen = sizeof(int),
 124         .mode = 0644,
 125         .proc_handler = &proc_dointvec_minmax,
 126         .strategy = &sysctl_intvec,
 127         .extra1 = &xpc_disengage_min_timelimit,
 128         .extra2 = &xpc_disengage_max_timelimit},
 129        {}
 130};
 131static ctl_table xpc_sys_dir[] = {
 132        {
 133         .ctl_name = CTL_UNNUMBERED,
 134         .procname = "xpc",
 135         .mode = 0555,
 136         .child = xpc_sys_xpc_dir},
 137        {}
 138};
 139static struct ctl_table_header *xpc_sysctl;
 140
 141/* non-zero if any remote partition disengage was timed out */
 142int xpc_disengage_timedout;
 143
 144/* #of activate IRQs received and not yet processed */
 145int xpc_activate_IRQ_rcvd;
 146DEFINE_SPINLOCK(xpc_activate_IRQ_rcvd_lock);
 147
 148/* IRQ handler notifies this wait queue on receipt of an IRQ */
 149DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
 150
 151static unsigned long xpc_hb_check_timeout;
 152static struct timer_list xpc_hb_timer;
 153void *xpc_heartbeating_to_mask;
 154
 155/* notification that the xpc_hb_checker thread has exited */
 156static DECLARE_COMPLETION(xpc_hb_checker_exited);
 157
 158/* notification that the xpc_discovery thread has exited */
 159static DECLARE_COMPLETION(xpc_discovery_exited);
 160
 161static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 162
 163static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
 164static struct notifier_block xpc_reboot_notifier = {
 165        .notifier_call = xpc_system_reboot,
 166};
 167
 168static int xpc_system_die(struct notifier_block *, unsigned long, void *);
 169static struct notifier_block xpc_die_notifier = {
 170        .notifier_call = xpc_system_die,
 171};
 172
 173int (*xpc_setup_partitions_sn) (void);
 174enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
 175                                                  unsigned long *rp_pa,
 176                                                  size_t *len);
 177int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
 178void (*xpc_heartbeat_init) (void);
 179void (*xpc_heartbeat_exit) (void);
 180void (*xpc_increment_heartbeat) (void);
 181void (*xpc_offline_heartbeat) (void);
 182void (*xpc_online_heartbeat) (void);
 183enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
 184
 185enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
 186void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
 187u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
 188enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
 189void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
 190void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
 191int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
 192void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
 193
 194void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
 195                                          unsigned long remote_rp_pa,
 196                                          int nasid);
 197void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
 198void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
 199void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
 200
 201void (*xpc_process_activate_IRQ_rcvd) (void);
 202enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
 203void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
 204
 205void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
 206int (*xpc_partition_engaged) (short partid);
 207int (*xpc_any_partition_engaged) (void);
 208void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
 209void (*xpc_assume_partition_disengaged) (short partid);
 210
 211void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
 212                                     unsigned long *irq_flags);
 213void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
 214                                   unsigned long *irq_flags);
 215void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
 216                                    unsigned long *irq_flags);
 217void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
 218                                  unsigned long *irq_flags);
 219
 220void (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
 221                                     unsigned long msgqueue_pa);
 222
 223enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
 224                                    void *payload, u16 payload_size,
 225                                    u8 notify_type, xpc_notify_func func,
 226                                    void *key);
 227void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
 228
 229/*
 230 * Timer function to enforce the timelimit on the partition disengage.
 231 */
 232static void
 233xpc_timeout_partition_disengage(unsigned long data)
 234{
 235        struct xpc_partition *part = (struct xpc_partition *)data;
 236
 237        DBUG_ON(time_is_after_jiffies(part->disengage_timeout));
 238
 239        (void)xpc_partition_disengaged(part);
 240
 241        DBUG_ON(part->disengage_timeout != 0);
 242        DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
 243}
 244
 245/*
 246 * Timer to produce the heartbeat.  The timer structures function is
 247 * already set when this is initially called.  A tunable is used to
 248 * specify when the next timeout should occur.
 249 */
 250static void
 251xpc_hb_beater(unsigned long dummy)
 252{
 253        xpc_increment_heartbeat();
 254
 255        if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
 256                wake_up_interruptible(&xpc_activate_IRQ_wq);
 257
 258        xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
 259        add_timer(&xpc_hb_timer);
 260}
 261
 262static void
 263xpc_start_hb_beater(void)
 264{
 265        xpc_heartbeat_init();
 266        init_timer(&xpc_hb_timer);
 267        xpc_hb_timer.function = xpc_hb_beater;
 268        xpc_hb_beater(0);
 269}
 270
 271static void
 272xpc_stop_hb_beater(void)
 273{
 274        del_timer_sync(&xpc_hb_timer);
 275        xpc_heartbeat_exit();
 276}
 277
 278/*
 279 * At periodic intervals, scan through all active partitions and ensure
 280 * their heartbeat is still active.  If not, the partition is deactivated.
 281 */
 282static void
 283xpc_check_remote_hb(void)
 284{
 285        struct xpc_partition *part;
 286        short partid;
 287        enum xp_retval ret;
 288
 289        for (partid = 0; partid < xp_max_npartitions; partid++) {
 290
 291                if (xpc_exiting)
 292                        break;
 293
 294                if (partid == xp_partition_id)
 295                        continue;
 296
 297                part = &xpc_partitions[partid];
 298
 299                if (part->act_state == XPC_P_AS_INACTIVE ||
 300                    part->act_state == XPC_P_AS_DEACTIVATING) {
 301                        continue;
 302                }
 303
 304                ret = xpc_get_remote_heartbeat(part);
 305                if (ret != xpSuccess)
 306                        XPC_DEACTIVATE_PARTITION(part, ret);
 307        }
 308}
 309
 310/*
 311 * This thread is responsible for nearly all of the partition
 312 * activation/deactivation.
 313 */
 314static int
 315xpc_hb_checker(void *ignore)
 316{
 317        int force_IRQ = 0;
 318
 319        /* this thread was marked active by xpc_hb_init() */
 320
 321        set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
 322
 323        /* set our heartbeating to other partitions into motion */
 324        xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
 325        xpc_start_hb_beater();
 326
 327        while (!xpc_exiting) {
 328
 329                dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
 330                        "been received\n",
 331                        (int)(xpc_hb_check_timeout - jiffies),
 332                        xpc_activate_IRQ_rcvd);
 333
 334                /* checking of remote heartbeats is skewed by IRQ handling */
 335                if (time_is_before_eq_jiffies(xpc_hb_check_timeout)) {
 336                        xpc_hb_check_timeout = jiffies +
 337                            (xpc_hb_check_interval * HZ);
 338
 339                        dev_dbg(xpc_part, "checking remote heartbeats\n");
 340                        xpc_check_remote_hb();
 341
 342                        /*
 343                         * On sn2 we need to periodically recheck to ensure no
 344                         * IRQ/amo pairs have been missed.
 345                         */
 346                        if (is_shub())
 347                                force_IRQ = 1;
 348                }
 349
 350                /* check for outstanding IRQs */
 351                if (xpc_activate_IRQ_rcvd > 0 || force_IRQ != 0) {
 352                        force_IRQ = 0;
 353                        dev_dbg(xpc_part, "processing activate IRQs "
 354                                "received\n");
 355                        xpc_process_activate_IRQ_rcvd();
 356                }
 357
 358                /* wait for IRQ or timeout */
 359                (void)wait_event_interruptible(xpc_activate_IRQ_wq,
 360                                               (time_is_before_eq_jiffies(
 361                                                xpc_hb_check_timeout) ||
 362                                                xpc_activate_IRQ_rcvd > 0 ||
 363                                                xpc_exiting));
 364        }
 365
 366        xpc_stop_hb_beater();
 367
 368        dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 369
 370        /* mark this thread as having exited */
 371        complete(&xpc_hb_checker_exited);
 372        return 0;
 373}
 374
 375/*
 376 * This thread will attempt to discover other partitions to activate
 377 * based on info provided by SAL. This new thread is short lived and
 378 * will exit once discovery is complete.
 379 */
 380static int
 381xpc_initiate_discovery(void *ignore)
 382{
 383        xpc_discovery();
 384
 385        dev_dbg(xpc_part, "discovery thread is exiting\n");
 386
 387        /* mark this thread as having exited */
 388        complete(&xpc_discovery_exited);
 389        return 0;
 390}
 391
 392/*
 393 * The first kthread assigned to a newly activated partition is the one
 394 * created by XPC HB with which it calls xpc_activating(). XPC hangs on to
 395 * that kthread until the partition is brought down, at which time that kthread
 396 * returns back to XPC HB. (The return of that kthread will signify to XPC HB
 397 * that XPC has dismantled all communication infrastructure for the associated
 398 * partition.) This kthread becomes the channel manager for that partition.
 399 *
 400 * Each active partition has a channel manager, who, besides connecting and
 401 * disconnecting channels, will ensure that each of the partition's connected
 402 * channels has the required number of assigned kthreads to get the work done.
 403 */
 404static void
 405xpc_channel_mgr(struct xpc_partition *part)
 406{
 407        while (part->act_state != XPC_P_AS_DEACTIVATING ||
 408               atomic_read(&part->nchannels_active) > 0 ||
 409               !xpc_partition_disengaged(part)) {
 410
 411                xpc_process_sent_chctl_flags(part);
 412
 413                /*
 414                 * Wait until we've been requested to activate kthreads or
 415                 * all of the channel's message queues have been torn down or
 416                 * a signal is pending.
 417                 *
 418                 * The channel_mgr_requests is set to 1 after being awakened,
 419                 * This is done to prevent the channel mgr from making one pass
 420                 * through the loop for each request, since he will
 421                 * be servicing all the requests in one pass. The reason it's
 422                 * set to 1 instead of 0 is so that other kthreads will know
 423                 * that the channel mgr is running and won't bother trying to
 424                 * wake him up.
 425                 */
 426                atomic_dec(&part->channel_mgr_requests);
 427                (void)wait_event_interruptible(part->channel_mgr_wq,
 428                                (atomic_read(&part->channel_mgr_requests) > 0 ||
 429                                 part->chctl.all_flags != 0 ||
 430                                 (part->act_state == XPC_P_AS_DEACTIVATING &&
 431                                 atomic_read(&part->nchannels_active) == 0 &&
 432                                 xpc_partition_disengaged(part))));
 433                atomic_set(&part->channel_mgr_requests, 1);
 434        }
 435}
 436
 437/*
 438 * Guarantee that the kzalloc'd memory is cacheline aligned.
 439 */
 440void *
 441xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
 442{
 443        /* see if kzalloc will give us cachline aligned memory by default */
 444        *base = kzalloc(size, flags);
 445        if (*base == NULL)
 446                return NULL;
 447
 448        if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
 449                return *base;
 450
 451        kfree(*base);
 452
 453        /* nope, we'll have to do it ourselves */
 454        *base = kzalloc(size + L1_CACHE_BYTES, flags);
 455        if (*base == NULL)
 456                return NULL;
 457
 458        return (void *)L1_CACHE_ALIGN((u64)*base);
 459}
 460
 461/*
 462 * Setup the channel structures necessary to support XPartition Communication
 463 * between the specified remote partition and the local one.
 464 */
 465static enum xp_retval
 466xpc_setup_ch_structures(struct xpc_partition *part)
 467{
 468        enum xp_retval ret;
 469        int ch_number;
 470        struct xpc_channel *ch;
 471        short partid = XPC_PARTID(part);
 472
 473        /*
 474         * Allocate all of the channel structures as a contiguous chunk of
 475         * memory.
 476         */
 477        DBUG_ON(part->channels != NULL);
 478        part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_MAX_NCHANNELS,
 479                                 GFP_KERNEL);
 480        if (part->channels == NULL) {
 481                dev_err(xpc_chan, "can't get memory for channels\n");
 482                return xpNoMemory;
 483        }
 484
 485        /* allocate the remote open and close args */
 486
 487        part->remote_openclose_args =
 488            xpc_kzalloc_cacheline_aligned(XPC_OPENCLOSE_ARGS_SIZE,
 489                                          GFP_KERNEL, &part->
 490                                          remote_openclose_args_base);
 491        if (part->remote_openclose_args == NULL) {
 492                dev_err(xpc_chan, "can't get memory for remote connect args\n");
 493                ret = xpNoMemory;
 494                goto out_1;
 495        }
 496
 497        part->chctl.all_flags = 0;
 498        spin_lock_init(&part->chctl_lock);
 499
 500        atomic_set(&part->channel_mgr_requests, 1);
 501        init_waitqueue_head(&part->channel_mgr_wq);
 502
 503        part->nchannels = XPC_MAX_NCHANNELS;
 504
 505        atomic_set(&part->nchannels_active, 0);
 506        atomic_set(&part->nchannels_engaged, 0);
 507
 508        for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
 509                ch = &part->channels[ch_number];
 510
 511                ch->partid = partid;
 512                ch->number = ch_number;
 513                ch->flags = XPC_C_DISCONNECTED;
 514
 515                atomic_set(&ch->kthreads_assigned, 0);
 516                atomic_set(&ch->kthreads_idle, 0);
 517                atomic_set(&ch->kthreads_active, 0);
 518
 519                atomic_set(&ch->references, 0);
 520                atomic_set(&ch->n_to_notify, 0);
 521
 522                spin_lock_init(&ch->lock);
 523                init_completion(&ch->wdisconnect_wait);
 524
 525                atomic_set(&ch->n_on_msg_allocate_wq, 0);
 526                init_waitqueue_head(&ch->msg_allocate_wq);
 527                init_waitqueue_head(&ch->idle_wq);
 528        }
 529
 530        ret = xpc_setup_ch_structures_sn(part);
 531        if (ret != xpSuccess)
 532                goto out_2;
 533
 534        /*
 535         * With the setting of the partition setup_state to XPC_P_SS_SETUP,
 536         * we're declaring that this partition is ready to go.
 537         */
 538        part->setup_state = XPC_P_SS_SETUP;
 539
 540        return xpSuccess;
 541
 542        /* setup of ch structures failed */
 543out_2:
 544        kfree(part->remote_openclose_args_base);
 545        part->remote_openclose_args = NULL;
 546out_1:
 547        kfree(part->channels);
 548        part->channels = NULL;
 549        return ret;
 550}
 551
 552/*
 553 * Teardown the channel structures necessary to support XPartition Communication
 554 * between the specified remote partition and the local one.
 555 */
 556static void
 557xpc_teardown_ch_structures(struct xpc_partition *part)
 558{
 559        DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
 560        DBUG_ON(atomic_read(&part->nchannels_active) != 0);
 561
 562        /*
 563         * Make this partition inaccessible to local processes by marking it
 564         * as no longer setup. Then wait before proceeding with the teardown
 565         * until all existing references cease.
 566         */
 567        DBUG_ON(part->setup_state != XPC_P_SS_SETUP);
 568        part->setup_state = XPC_P_SS_WTEARDOWN;
 569
 570        wait_event(part->teardown_wq, (atomic_read(&part->references) == 0));
 571
 572        /* now we can begin tearing down the infrastructure */
 573
 574        xpc_teardown_ch_structures_sn(part);
 575
 576        kfree(part->remote_openclose_args_base);
 577        part->remote_openclose_args = NULL;
 578        kfree(part->channels);
 579        part->channels = NULL;
 580
 581        part->setup_state = XPC_P_SS_TORNDOWN;
 582}
 583
 584/*
 585 * When XPC HB determines that a partition has come up, it will create a new
 586 * kthread and that kthread will call this function to attempt to set up the
 587 * basic infrastructure used for Cross Partition Communication with the newly
 588 * upped partition.
 589 *
 590 * The kthread that was created by XPC HB and which setup the XPC
 591 * infrastructure will remain assigned to the partition becoming the channel
 592 * manager for that partition until the partition is deactivating, at which
 593 * time the kthread will teardown the XPC infrastructure and then exit.
 594 */
 595static int
 596xpc_activating(void *__partid)
 597{
 598        short partid = (u64)__partid;
 599        struct xpc_partition *part = &xpc_partitions[partid];
 600        unsigned long irq_flags;
 601
 602        DBUG_ON(partid < 0 || partid >= xp_max_npartitions);
 603
 604        spin_lock_irqsave(&part->act_lock, irq_flags);
 605
 606        if (part->act_state == XPC_P_AS_DEACTIVATING) {
 607                part->act_state = XPC_P_AS_INACTIVE;
 608                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 609                part->remote_rp_pa = 0;
 610                return 0;
 611        }
 612
 613        /* indicate the thread is activating */
 614        DBUG_ON(part->act_state != XPC_P_AS_ACTIVATION_REQ);
 615        part->act_state = XPC_P_AS_ACTIVATING;
 616
 617        XPC_SET_REASON(part, 0, 0);
 618        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 619
 620        dev_dbg(xpc_part, "activating partition %d\n", partid);
 621
 622        xpc_allow_hb(partid);
 623
 624        if (xpc_setup_ch_structures(part) == xpSuccess) {
 625                (void)xpc_part_ref(part);        /* this will always succeed */
 626
 627                if (xpc_make_first_contact(part) == xpSuccess) {
 628                        xpc_mark_partition_active(part);
 629                        xpc_channel_mgr(part);
 630                        /* won't return until partition is deactivating */
 631                }
 632
 633                xpc_part_deref(part);
 634                xpc_teardown_ch_structures(part);
 635        }
 636
 637        xpc_disallow_hb(partid);
 638        xpc_mark_partition_inactive(part);
 639
 640        if (part->reason == xpReactivating) {
 641                /* interrupting ourselves results in activating partition */
 642                xpc_request_partition_reactivation(part);
 643        }
 644
 645        return 0;
 646}
 647
 648void
 649xpc_activate_partition(struct xpc_partition *part)
 650{
 651        short partid = XPC_PARTID(part);
 652        unsigned long irq_flags;
 653        struct task_struct *kthread;
 654
 655        spin_lock_irqsave(&part->act_lock, irq_flags);
 656
 657        DBUG_ON(part->act_state != XPC_P_AS_INACTIVE);
 658
 659        part->act_state = XPC_P_AS_ACTIVATION_REQ;
 660        XPC_SET_REASON(part, xpCloneKThread, __LINE__);
 661
 662        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 663
 664        kthread = kthread_run(xpc_activating, (void *)((u64)partid), "xpc%02d",
 665                              partid);
 666        if (IS_ERR(kthread)) {
 667                spin_lock_irqsave(&part->act_lock, irq_flags);
 668                part->act_state = XPC_P_AS_INACTIVE;
 669                XPC_SET_REASON(part, xpCloneKThreadFailed, __LINE__);
 670                spin_unlock_irqrestore(&part->act_lock, irq_flags);
 671        }
 672}
 673
 674void
 675xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 676{
 677        int idle = atomic_read(&ch->kthreads_idle);
 678        int assigned = atomic_read(&ch->kthreads_assigned);
 679        int wakeup;
 680
 681        DBUG_ON(needed <= 0);
 682
 683        if (idle > 0) {
 684                wakeup = (needed > idle) ? idle : needed;
 685                needed -= wakeup;
 686
 687                dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
 688                        "channel=%d\n", wakeup, ch->partid, ch->number);
 689
 690                /* only wakeup the requested number of kthreads */
 691                wake_up_nr(&ch->idle_wq, wakeup);
 692        }
 693
 694        if (needed <= 0)
 695                return;
 696
 697        if (needed + assigned > ch->kthreads_assigned_limit) {
 698                needed = ch->kthreads_assigned_limit - assigned;
 699                if (needed <= 0)
 700                        return;
 701        }
 702
 703        dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
 704                needed, ch->partid, ch->number);
 705
 706        xpc_create_kthreads(ch, needed, 0);
 707}
 708
 709/*
 710 * This function is where XPC's kthreads wait for messages to deliver.
 711 */
 712static void
 713xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 714{
 715        do {
 716                /* deliver messages to their intended recipients */
 717
 718                while (xpc_n_of_deliverable_payloads(ch) > 0 &&
 719                       !(ch->flags & XPC_C_DISCONNECTING)) {
 720                        xpc_deliver_payload(ch);
 721                }
 722
 723                if (atomic_inc_return(&ch->kthreads_idle) >
 724                    ch->kthreads_idle_limit) {
 725                        /* too many idle kthreads on this channel */
 726                        atomic_dec(&ch->kthreads_idle);
 727                        break;
 728                }
 729
 730                dev_dbg(xpc_chan, "idle kthread calling "
 731                        "wait_event_interruptible_exclusive()\n");
 732
 733                (void)wait_event_interruptible_exclusive(ch->idle_wq,
 734                                (xpc_n_of_deliverable_payloads(ch) > 0 ||
 735                                 (ch->flags & XPC_C_DISCONNECTING)));
 736
 737                atomic_dec(&ch->kthreads_idle);
 738
 739        } while (!(ch->flags & XPC_C_DISCONNECTING));
 740}
 741
 742static int
 743xpc_kthread_start(void *args)
 744{
 745        short partid = XPC_UNPACK_ARG1(args);
 746        u16 ch_number = XPC_UNPACK_ARG2(args);
 747        struct xpc_partition *part = &xpc_partitions[partid];
 748        struct xpc_channel *ch;
 749        int n_needed;
 750        unsigned long irq_flags;
 751
 752        dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
 753                partid, ch_number);
 754
 755        ch = &part->channels[ch_number];
 756
 757        if (!(ch->flags & XPC_C_DISCONNECTING)) {
 758
 759                /* let registerer know that connection has been established */
 760
 761                spin_lock_irqsave(&ch->lock, irq_flags);
 762                if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
 763                        ch->flags |= XPC_C_CONNECTEDCALLOUT;
 764                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 765
 766                        xpc_connected_callout(ch);
 767
 768                        spin_lock_irqsave(&ch->lock, irq_flags);
 769                        ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
 770                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 771
 772                        /*
 773                         * It is possible that while the callout was being
 774                         * made that the remote partition sent some messages.
 775                         * If that is the case, we may need to activate
 776                         * additional kthreads to help deliver them. We only
 777                         * need one less than total #of messages to deliver.
 778                         */
 779                        n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
 780                        if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
 781                                xpc_activate_kthreads(ch, n_needed);
 782
 783                } else {
 784                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 785                }
 786
 787                xpc_kthread_waitmsgs(part, ch);
 788        }
 789
 790        /* let registerer know that connection is disconnecting */
 791
 792        spin_lock_irqsave(&ch->lock, irq_flags);
 793        if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
 794            !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
 795                ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
 796                spin_unlock_irqrestore(&ch->lock, irq_flags);
 797
 798                xpc_disconnect_callout(ch, xpDisconnecting);
 799
 800                spin_lock_irqsave(&ch->lock, irq_flags);
 801                ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
 802        }
 803        spin_unlock_irqrestore(&ch->lock, irq_flags);
 804
 805        if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 806            atomic_dec_return(&part->nchannels_engaged) == 0) {
 807                xpc_indicate_partition_disengaged(part);
 808        }
 809
 810        xpc_msgqueue_deref(ch);
 811
 812        dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
 813                partid, ch_number);
 814
 815        xpc_part_deref(part);
 816        return 0;
 817}
 818
 819/*
 820 * For each partition that XPC has established communications with, there is
 821 * a minimum of one kernel thread assigned to perform any operation that
 822 * may potentially sleep or block (basically the callouts to the asynchronous
 823 * functions registered via xpc_connect()).
 824 *
 825 * Additional kthreads are created and destroyed by XPC as the workload
 826 * demands.
 827 *
 828 * A kthread is assigned to one of the active channels that exists for a given
 829 * partition.
 830 */
 831void
 832xpc_create_kthreads(struct xpc_channel *ch, int needed,
 833                    int ignore_disconnecting)
 834{
 835        unsigned long irq_flags;
 836        u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
 837        struct xpc_partition *part = &xpc_partitions[ch->partid];
 838        struct task_struct *kthread;
 839
 840        while (needed-- > 0) {
 841
 842                /*
 843                 * The following is done on behalf of the newly created
 844                 * kthread. That kthread is responsible for doing the
 845                 * counterpart to the following before it exits.
 846                 */
 847                if (ignore_disconnecting) {
 848                        if (!atomic_inc_not_zero(&ch->kthreads_assigned)) {
 849                                /* kthreads assigned had gone to zero */
 850                                BUG_ON(!(ch->flags &
 851                                         XPC_C_DISCONNECTINGCALLOUT_MADE));
 852                                break;
 853                        }
 854
 855                } else if (ch->flags & XPC_C_DISCONNECTING) {
 856                        break;
 857
 858                } else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
 859                           atomic_inc_return(&part->nchannels_engaged) == 1) {
 860                                xpc_indicate_partition_engaged(part);
 861                }
 862                (void)xpc_part_ref(part);
 863                xpc_msgqueue_ref(ch);
 864
 865                kthread = kthread_run(xpc_kthread_start, (void *)args,
 866                                      "xpc%02dc%d", ch->partid, ch->number);
 867                if (IS_ERR(kthread)) {
 868                        /* the fork failed */
 869
 870                        /*
 871                         * NOTE: if (ignore_disconnecting &&
 872                         * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true,
 873                         * then we'll deadlock if all other kthreads assigned
 874                         * to this channel are blocked in the channel's
 875                         * registerer, because the only thing that will unblock
 876                         * them is the xpDisconnecting callout that this
 877                         * failed kthread_run() would have made.
 878                         */
 879
 880                        if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 881                            atomic_dec_return(&part->nchannels_engaged) == 0) {
 882                                xpc_indicate_partition_disengaged(part);
 883                        }
 884                        xpc_msgqueue_deref(ch);
 885                        xpc_part_deref(part);
 886
 887                        if (atomic_read(&ch->kthreads_assigned) <
 888                            ch->kthreads_idle_limit) {
 889                                /*
 890                                 * Flag this as an error only if we have an
 891                                 * insufficient #of kthreads for the channel
 892                                 * to function.
 893                                 */
 894                                spin_lock_irqsave(&ch->lock, irq_flags);
 895                                XPC_DISCONNECT_CHANNEL(ch, xpLackOfResources,
 896                                                       &irq_flags);
 897                                spin_unlock_irqrestore(&ch->lock, irq_flags);
 898                        }
 899                        break;
 900                }
 901        }
 902}
 903
 904void
 905xpc_disconnect_wait(int ch_number)
 906{
 907        unsigned long irq_flags;
 908        short partid;
 909        struct xpc_partition *part;
 910        struct xpc_channel *ch;
 911        int wakeup_channel_mgr;
 912
 913        /* now wait for all callouts to the caller's function to cease */
 914        for (partid = 0; partid < xp_max_npartitions; partid++) {
 915                part = &xpc_partitions[partid];
 916
 917                if (!xpc_part_ref(part))
 918                        continue;
 919
 920                ch = &part->channels[ch_number];
 921
 922                if (!(ch->flags & XPC_C_WDISCONNECT)) {
 923                        xpc_part_deref(part);
 924                        continue;
 925                }
 926
 927                wait_for_completion(&ch->wdisconnect_wait);
 928
 929                spin_lock_irqsave(&ch->lock, irq_flags);
 930                DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
 931                wakeup_channel_mgr = 0;
 932
 933                if (ch->delayed_chctl_flags) {
 934                        if (part->act_state != XPC_P_AS_DEACTIVATING) {
 935                                spin_lock(&part->chctl_lock);
 936                                part->chctl.flags[ch->number] |=
 937                                    ch->delayed_chctl_flags;
 938                                spin_unlock(&part->chctl_lock);
 939                                wakeup_channel_mgr = 1;
 940                        }
 941                        ch->delayed_chctl_flags = 0;
 942                }
 943
 944                ch->flags &= ~XPC_C_WDISCONNECT;
 945                spin_unlock_irqrestore(&ch->lock, irq_flags);
 946
 947                if (wakeup_channel_mgr)
 948                        xpc_wakeup_channel_mgr(part);
 949
 950                xpc_part_deref(part);
 951        }
 952}
 953
 954static int
 955xpc_setup_partitions(void)
 956{
 957        short partid;
 958        struct xpc_partition *part;
 959
 960        xpc_partitions = kzalloc(sizeof(struct xpc_partition) *
 961                                 xp_max_npartitions, GFP_KERNEL);
 962        if (xpc_partitions == NULL) {
 963                dev_err(xpc_part, "can't get memory for partition structure\n");
 964                return -ENOMEM;
 965        }
 966
 967        /*
 968         * The first few fields of each entry of xpc_partitions[] need to
 969         * be initialized now so that calls to xpc_connect() and
 970         * xpc_disconnect() can be made prior to the activation of any remote
 971         * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
 972         * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
 973         * PARTITION HAS BEEN ACTIVATED.
 974         */
 975        for (partid = 0; partid < xp_max_npartitions; partid++) {
 976                part = &xpc_partitions[partid];
 977
 978                DBUG_ON((u64)part != L1_CACHE_ALIGN((u64)part));
 979
 980                part->activate_IRQ_rcvd = 0;
 981                spin_lock_init(&part->act_lock);
 982                part->act_state = XPC_P_AS_INACTIVE;
 983                XPC_SET_REASON(part, 0, 0);
 984
 985                init_timer(&part->disengage_timer);
 986                part->disengage_timer.function =
 987                    xpc_timeout_partition_disengage;
 988                part->disengage_timer.data = (unsigned long)part;
 989
 990                part->setup_state = XPC_P_SS_UNSET;
 991                init_waitqueue_head(&part->teardown_wq);
 992                atomic_set(&part->references, 0);
 993        }
 994
 995        return xpc_setup_partitions_sn();
 996}
 997
 998static void
 999xpc_teardown_partitions(void)
1000{
1001        kfree(xpc_partitions);
1002}
1003
1004static void
1005xpc_do_exit(enum xp_retval reason)
1006{
1007        short partid;
1008        int active_part_count, printed_waiting_msg = 0;
1009        struct xpc_partition *part;
1010        unsigned long printmsg_time, disengage_timeout = 0;
1011
1012        /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
1013        DBUG_ON(xpc_exiting == 1);
1014
1015        /*
1016         * Let the heartbeat checker thread and the discovery thread
1017         * (if one is running) know that they should exit. Also wake up
1018         * the heartbeat checker thread in case it's sleeping.
1019         */
1020        xpc_exiting = 1;
1021        wake_up_interruptible(&xpc_activate_IRQ_wq);
1022
1023        /* wait for the discovery thread to exit */
1024        wait_for_completion(&xpc_discovery_exited);
1025
1026        /* wait for the heartbeat checker thread to exit */
1027        wait_for_completion(&xpc_hb_checker_exited);
1028
1029        /* sleep for a 1/3 of a second or so */
1030        (void)msleep_interruptible(300);
1031
1032        /* wait for all partitions to become inactive */
1033
1034        printmsg_time = jiffies + (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
1035        xpc_disengage_timedout = 0;
1036
1037        do {
1038                active_part_count = 0;
1039
1040                for (partid = 0; partid < xp_max_npartitions; partid++) {
1041                        part = &xpc_partitions[partid];
1042
1043                        if (xpc_partition_disengaged(part) &&
1044                            part->act_state == XPC_P_AS_INACTIVE) {
1045                                continue;
1046                        }
1047
1048                        active_part_count++;
1049
1050                        XPC_DEACTIVATE_PARTITION(part, reason);
1051
1052                        if (part->disengage_timeout > disengage_timeout)
1053                                disengage_timeout = part->disengage_timeout;
1054                }
1055
1056                if (xpc_any_partition_engaged()) {
1057                        if (time_is_before_jiffies(printmsg_time)) {
1058                                dev_info(xpc_part, "waiting for remote "
1059                                         "partitions to deactivate, timeout in "
1060                                         "%ld seconds\n", (disengage_timeout -
1061                                         jiffies) / HZ);
1062                                printmsg_time = jiffies +
1063                                    (XPC_DEACTIVATE_PRINTMSG_INTERVAL * HZ);
1064                                printed_waiting_msg = 1;
1065                        }
1066
1067                } else if (active_part_count > 0) {
1068                        if (printed_waiting_msg) {
1069                                dev_info(xpc_part, "waiting for local partition"
1070                                         " to deactivate\n");
1071                                printed_waiting_msg = 0;
1072                        }
1073
1074                } else {
1075                        if (!xpc_disengage_timedout) {
1076                                dev_info(xpc_part, "all partitions have "
1077                                         "deactivated\n");
1078                        }
1079                        break;
1080                }
1081
1082                /* sleep for a 1/3 of a second or so */
1083                (void)msleep_interruptible(300);
1084
1085        } while (1);
1086
1087        DBUG_ON(xpc_any_partition_engaged());
1088        DBUG_ON(xpc_any_hbs_allowed() != 0);
1089
1090        xpc_teardown_rsvd_page();
1091
1092        if (reason == xpUnloading) {
1093                (void)unregister_die_notifier(&xpc_die_notifier);
1094                (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1095        }
1096
1097        /* clear the interface to XPC's functions */
1098        xpc_clear_interface();
1099
1100        if (xpc_sysctl)
1101                unregister_sysctl_table(xpc_sysctl);
1102
1103        xpc_teardown_partitions();
1104
1105        if (is_shub())
1106                xpc_exit_sn2();
1107        else if (is_uv())
1108                xpc_exit_uv();
1109}
1110
1111/*
1112 * This function is called when the system is being rebooted.
1113 */
1114static int
1115xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1116{
1117        enum xp_retval reason;
1118
1119        switch (event) {
1120        case SYS_RESTART:
1121                reason = xpSystemReboot;
1122                break;
1123        case SYS_HALT:
1124                reason = xpSystemHalt;
1125                break;
1126        case SYS_POWER_OFF:
1127                reason = xpSystemPoweroff;
1128                break;
1129        default:
1130                reason = xpSystemGoingDown;
1131        }
1132
1133        xpc_do_exit(reason);
1134        return NOTIFY_DONE;
1135}
1136
1137/*
1138 * Notify other partitions to deactivate from us by first disengaging from all
1139 * references to our memory.
1140 */
1141static void
1142xpc_die_deactivate(void)
1143{
1144        struct xpc_partition *part;
1145        short partid;
1146        int any_engaged;
1147        long keep_waiting;
1148        long wait_to_print;
1149
1150        /* keep xpc_hb_checker thread from doing anything (just in case) */
1151        xpc_exiting = 1;
1152
1153        xpc_disallow_all_hbs();        /*indicate we're deactivated */
1154
1155        for (partid = 0; partid < xp_max_npartitions; partid++) {
1156                part = &xpc_partitions[partid];
1157
1158                if (xpc_partition_engaged(partid) ||
1159                    part->act_state != XPC_P_AS_INACTIVE) {
1160                        xpc_request_partition_deactivation(part);
1161                        xpc_indicate_partition_disengaged(part);
1162                }
1163        }
1164
1165        /*
1166         * Though we requested that all other partitions deactivate from us,
1167         * we only wait until they've all disengaged or we've reached the
1168         * defined timelimit.
1169         *
1170         * Given that one iteration through the following while-loop takes
1171         * approximately 200 microseconds, calculate the #of loops to take
1172         * before bailing and the #of loops before printing a waiting message.
1173         */
1174        keep_waiting = xpc_disengage_timelimit * 1000 * 5;
1175        wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
1176
1177        while (1) {
1178                any_engaged = xpc_any_partition_engaged();
1179                if (!any_engaged) {
1180                        dev_info(xpc_part, "all partitions have deactivated\n");
1181                        break;
1182                }
1183
1184                if (!keep_waiting--) {
1185                        for (partid = 0; partid < xp_max_npartitions;
1186                             partid++) {
1187                                if (xpc_partition_engaged(partid)) {
1188                                        dev_info(xpc_part, "deactivate from "
1189                                                 "remote partition %d timed "
1190                                                 "out\n", partid);
1191                                }
1192                        }
1193                        break;
1194                }
1195
1196                if (!wait_to_print--) {
1197                        dev_info(xpc_part, "waiting for remote partitions to "
1198                                 "deactivate, timeout in %ld seconds\n",
1199                                 keep_waiting / (1000 * 5));
1200                        wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL *
1201                            1000 * 5;
1202                }
1203
1204                udelay(200);
1205        }
1206}
1207
1208/*
1209 * This function is called when the system is being restarted or halted due
1210 * to some sort of system failure. If this is the case we need to notify the
1211 * other partitions to disengage from all references to our memory.
1212 * This function can also be called when our heartbeater could be offlined
1213 * for a time. In this case we need to notify other partitions to not worry
1214 * about the lack of a heartbeat.
1215 */
1216static int
1217xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1218{
1219#ifdef CONFIG_IA64                /* !!! temporary kludge */
1220        switch (event) {
1221        case DIE_MACHINE_RESTART:
1222        case DIE_MACHINE_HALT:
1223                xpc_die_deactivate();
1224                break;
1225
1226        case DIE_KDEBUG_ENTER:
1227                /* Should lack of heartbeat be ignored by other partitions? */
1228                if (!xpc_kdebug_ignore)
1229                        break;
1230
1231                /* fall through */
1232        case DIE_MCA_MONARCH_ENTER:
1233        case DIE_INIT_MONARCH_ENTER:
1234                xpc_offline_heartbeat();
1235                break;
1236
1237        case DIE_KDEBUG_LEAVE:
1238                /* Is lack of heartbeat being ignored by other partitions? */
1239                if (!xpc_kdebug_ignore)
1240                        break;
1241
1242                /* fall through */
1243        case DIE_MCA_MONARCH_LEAVE:
1244        case DIE_INIT_MONARCH_LEAVE:
1245                xpc_online_heartbeat();
1246                break;
1247        }
1248#else
1249        xpc_die_deactivate();
1250#endif
1251
1252        return NOTIFY_DONE;
1253}
1254
1255int __init
1256xpc_init(void)
1257{
1258        int ret;
1259        struct task_struct *kthread;
1260
1261        snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
1262        snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
1263
1264        if (is_shub()) {
1265                /*
1266                 * The ia64-sn2 architecture supports at most 64 partitions.
1267                 * And the inability to unregister remote amos restricts us
1268                 * further to only support exactly 64 partitions on this
1269                 * architecture, no less.
1270                 */
1271                if (xp_max_npartitions != 64) {
1272                        dev_err(xpc_part, "max #of partitions not set to 64\n");
1273                        ret = -EINVAL;
1274                } else {
1275                        ret = xpc_init_sn2();
1276                }
1277
1278        } else if (is_uv()) {
1279                ret = xpc_init_uv();
1280
1281        } else {
1282                ret = -ENODEV;
1283        }
1284
1285        if (ret != 0)
1286                return ret;
1287
1288        ret = xpc_setup_partitions();
1289        if (ret != 0) {
1290                dev_err(xpc_part, "can't get memory for partition structure\n");
1291                goto out_1;
1292        }
1293
1294        xpc_sysctl = register_sysctl_table(xpc_sys_dir);
1295
1296        /*
1297         * Fill the partition reserved page with the information needed by
1298         * other partitions to discover we are alive and establish initial
1299         * communications.
1300         */
1301        ret = xpc_setup_rsvd_page();
1302        if (ret != 0) {
1303                dev_err(xpc_part, "can't setup our reserved page\n");
1304                goto out_2;
1305        }
1306
1307        /* add ourselves to the reboot_notifier_list */
1308        ret = register_reboot_notifier(&xpc_reboot_notifier);
1309        if (ret != 0)
1310                dev_warn(xpc_part, "can't register reboot notifier\n");
1311
1312        /* add ourselves to the die_notifier list */
1313        ret = register_die_notifier(&xpc_die_notifier);
1314        if (ret != 0)
1315                dev_warn(xpc_part, "can't register die notifier\n");
1316
1317        /*
1318         * The real work-horse behind xpc.  This processes incoming
1319         * interrupts and monitors remote heartbeats.
1320         */
1321        kthread = kthread_run(xpc_hb_checker, NULL, XPC_HB_CHECK_THREAD_NAME);
1322        if (IS_ERR(kthread)) {
1323                dev_err(xpc_part, "failed while forking hb check thread\n");
1324                ret = -EBUSY;
1325                goto out_3;
1326        }
1327
1328        /*
1329         * Startup a thread that will attempt to discover other partitions to
1330         * activate based on info provided by SAL. This new thread is short
1331         * lived and will exit once discovery is complete.
1332         */
1333        kthread = kthread_run(xpc_initiate_discovery, NULL,
1334                              XPC_DISCOVERY_THREAD_NAME);
1335        if (IS_ERR(kthread)) {
1336                dev_err(xpc_part, "failed while forking discovery thread\n");
1337
1338                /* mark this new thread as a non-starter */
1339                complete(&xpc_discovery_exited);
1340
1341                xpc_do_exit(xpUnloading);
1342                return -EBUSY;
1343        }
1344
1345        /* set the interface to point at XPC's functions */
1346        xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1347                          xpc_initiate_send, xpc_initiate_send_notify,
1348                          xpc_initiate_received, xpc_initiate_partid_to_nasids);
1349
1350        return 0;
1351
1352        /* initialization was not successful */
1353out_3:
1354        xpc_teardown_rsvd_page();
1355
1356        (void)unregister_die_notifier(&xpc_die_notifier);
1357        (void)unregister_reboot_notifier(&xpc_reboot_notifier);
1358out_2:
1359        if (xpc_sysctl)
1360                unregister_sysctl_table(xpc_sysctl);
1361
1362        xpc_teardown_partitions();
1363out_1:
1364        if (is_shub())
1365                xpc_exit_sn2();
1366        else if (is_uv())
1367                xpc_exit_uv();
1368        return ret;
1369}
1370
1371module_init(xpc_init);
1372
1373void __exit
1374xpc_exit(void)
1375{
1376        xpc_do_exit(xpUnloading);
1377}
1378
1379module_exit(xpc_exit);
1380
1381MODULE_AUTHOR("Silicon Graphics, Inc.");
1382MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1383MODULE_LICENSE("GPL");
1384
1385module_param(xpc_hb_interval, int, 0);
1386MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1387                 "heartbeat increments.");
1388
1389module_param(xpc_hb_check_interval, int, 0);
1390MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1391                 "heartbeat checks.");
1392
1393module_param(xpc_disengage_timelimit, int, 0);
1394MODULE_PARM_DESC(xpc_disengage_timelimit, "Number of seconds to wait "
1395                 "for disengage to complete.");
1396
1397module_param(xpc_kdebug_ignore, int, 0);
1398MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
1399                 "other partitions when dropping into kdebug.");