Showing error 1480

User: Jiri Slaby
Error type: Leaving function in locked state
Error type description: Some lock is not unlocked on all paths of a function, so it is leaked
File location: net/ipv4/ipmr.c
Line in file: 1702
Project: Linux Kernel
Project version: 2.6.28
Tools: Stanse (1.2)
Entered: 2012-05-21 20:30:05 UTC


Source:

   1/*
   2 *        IP multicast routing support for mrouted 3.6/3.8
   3 *
   4 *                (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5 *          Linux Consultancy and Custom Driver Development
   6 *
   7 *        This program is free software; you can redistribute it and/or
   8 *        modify it under the terms of the GNU General Public License
   9 *        as published by the Free Software Foundation; either version
  10 *        2 of the License, or (at your option) any later version.
  11 *
  12 *        Fixes:
  13 *        Michael Chastain        :        Incorrect size of copying.
  14 *        Alan Cox                :        Added the cache manager code
  15 *        Alan Cox                :        Fixed the clone/copy bug and device race.
  16 *        Mike McLagan                :        Routing by source
  17 *        Malcolm Beattie                :        Buffer handling fixes.
  18 *        Alexey Kuznetsov        :        Double buffer free and other fixes.
  19 *        SVR Anand                :        Fixed several multicast bugs and problems.
  20 *        Alexey Kuznetsov        :        Status, optimisations and more.
  21 *        Brad Parker                :        Better behaviour on mrouted upcall
  22 *                                        overflow.
  23 *      Carlos Picoto           :       PIMv1 Support
  24 *        Pavlin Ivanov Radoslavov:        PIMv2 Registers must checksum only PIM header
  25 *                                        Relax this requrement to work with older peers.
  26 *
  27 */
  28
  29#include <asm/system.h>
  30#include <asm/uaccess.h>
  31#include <linux/types.h>
  32#include <linux/capability.h>
  33#include <linux/errno.h>
  34#include <linux/timer.h>
  35#include <linux/mm.h>
  36#include <linux/kernel.h>
  37#include <linux/fcntl.h>
  38#include <linux/stat.h>
  39#include <linux/socket.h>
  40#include <linux/in.h>
  41#include <linux/inet.h>
  42#include <linux/netdevice.h>
  43#include <linux/inetdevice.h>
  44#include <linux/igmp.h>
  45#include <linux/proc_fs.h>
  46#include <linux/seq_file.h>
  47#include <linux/mroute.h>
  48#include <linux/init.h>
  49#include <linux/if_ether.h>
  50#include <net/net_namespace.h>
  51#include <net/ip.h>
  52#include <net/protocol.h>
  53#include <linux/skbuff.h>
  54#include <net/route.h>
  55#include <net/sock.h>
  56#include <net/icmp.h>
  57#include <net/udp.h>
  58#include <net/raw.h>
  59#include <linux/notifier.h>
  60#include <linux/if_arp.h>
  61#include <linux/netfilter_ipv4.h>
  62#include <net/ipip.h>
  63#include <net/checksum.h>
  64#include <net/netlink.h>
  65
  66#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  67#define CONFIG_IP_PIMSM        1
  68#endif
  69
  70static struct sock *mroute_socket;
  71
  72
  73/* Big lock, protecting vif table, mrt cache and mroute socket state.
  74   Note that the changes are semaphored via rtnl_lock.
  75 */
  76
  77static DEFINE_RWLOCK(mrt_lock);
  78
  79/*
  80 *        Multicast router control variables
  81 */
  82
  83static struct vif_device vif_table[MAXVIFS];                /* Devices                 */
  84static int maxvif;
  85
  86#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
  87
  88static int mroute_do_assert;                                /* Set in PIM assert        */
  89static int mroute_do_pim;
  90
  91static struct mfc_cache *mfc_cache_array[MFC_LINES];        /* Forwarding cache        */
  92
  93static struct mfc_cache *mfc_unres_queue;                /* Queue of unresolved entries */
  94static atomic_t cache_resolve_queue_len;                /* Size of unresolved        */
  95
  96/* Special spinlock for queue of unresolved entries */
  97static DEFINE_SPINLOCK(mfc_unres_lock);
  98
  99/* We return to original Alan's scheme. Hash table of resolved
 100   entries is changed only in process context and protected
 101   with weak lock mrt_lock. Queue of unresolved entries is protected
 102   with strong spinlock mfc_unres_lock.
 103
 104   In this case data path is free of exclusive locks at all.
 105 */
 106
 107static struct kmem_cache *mrt_cachep __read_mostly;
 108
 109static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
 110static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
 111static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 112
 113#ifdef CONFIG_IP_PIMSM_V2
 114static struct net_protocol pim_protocol;
 115#endif
 116
 117static struct timer_list ipmr_expire_timer;
 118
 119/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 120
 121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 122{
 123        dev_close(dev);
 124
 125        dev = __dev_get_by_name(&init_net, "tunl0");
 126        if (dev) {
 127                struct ifreq ifr;
 128                mm_segment_t        oldfs;
 129                struct ip_tunnel_parm p;
 130
 131                memset(&p, 0, sizeof(p));
 132                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 133                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 134                p.iph.version = 4;
 135                p.iph.ihl = 5;
 136                p.iph.protocol = IPPROTO_IPIP;
 137                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 138                ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 139
 140                oldfs = get_fs(); set_fs(KERNEL_DS);
 141                dev->do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 142                set_fs(oldfs);
 143        }
 144}
 145
 146static
 147struct net_device *ipmr_new_tunnel(struct vifctl *v)
 148{
 149        struct net_device  *dev;
 150
 151        dev = __dev_get_by_name(&init_net, "tunl0");
 152
 153        if (dev) {
 154                int err;
 155                struct ifreq ifr;
 156                mm_segment_t        oldfs;
 157                struct ip_tunnel_parm p;
 158                struct in_device  *in_dev;
 159
 160                memset(&p, 0, sizeof(p));
 161                p.iph.daddr = v->vifc_rmt_addr.s_addr;
 162                p.iph.saddr = v->vifc_lcl_addr.s_addr;
 163                p.iph.version = 4;
 164                p.iph.ihl = 5;
 165                p.iph.protocol = IPPROTO_IPIP;
 166                sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 167                ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 168
 169                oldfs = get_fs(); set_fs(KERNEL_DS);
 170                err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 171                set_fs(oldfs);
 172
 173                dev = NULL;
 174
 175                if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
 176                        dev->flags |= IFF_MULTICAST;
 177
 178                        in_dev = __in_dev_get_rtnl(dev);
 179                        if (in_dev == NULL)
 180                                goto failure;
 181
 182                        ipv4_devconf_setall(in_dev);
 183                        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 184
 185                        if (dev_open(dev))
 186                                goto failure;
 187                        dev_hold(dev);
 188                }
 189        }
 190        return dev;
 191
 192failure:
 193        /* allow the register to be completed before unregistering. */
 194        rtnl_unlock();
 195        rtnl_lock();
 196
 197        unregister_netdevice(dev);
 198        return NULL;
 199}
 200
 201#ifdef CONFIG_IP_PIMSM
 202
 203static int reg_vif_num = -1;
 204
 205static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 206{
 207        read_lock(&mrt_lock);
 208        dev->stats.tx_bytes += skb->len;
 209        dev->stats.tx_packets++;
 210        ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
 211        read_unlock(&mrt_lock);
 212        kfree_skb(skb);
 213        return 0;
 214}
 215
 216static void reg_vif_setup(struct net_device *dev)
 217{
 218        dev->type                = ARPHRD_PIMREG;
 219        dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 220        dev->flags                = IFF_NOARP;
 221        dev->hard_start_xmit        = reg_vif_xmit;
 222        dev->destructor                = free_netdev;
 223}
 224
 225static struct net_device *ipmr_reg_vif(void)
 226{
 227        struct net_device *dev;
 228        struct in_device *in_dev;
 229
 230        dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 231
 232        if (dev == NULL)
 233                return NULL;
 234
 235        if (register_netdevice(dev)) {
 236                free_netdev(dev);
 237                return NULL;
 238        }
 239        dev->iflink = 0;
 240
 241        rcu_read_lock();
 242        if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 243                rcu_read_unlock();
 244                goto failure;
 245        }
 246
 247        ipv4_devconf_setall(in_dev);
 248        IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 249        rcu_read_unlock();
 250
 251        if (dev_open(dev))
 252                goto failure;
 253
 254        dev_hold(dev);
 255
 256        return dev;
 257
 258failure:
 259        /* allow the register to be completed before unregistering. */
 260        rtnl_unlock();
 261        rtnl_lock();
 262
 263        unregister_netdevice(dev);
 264        return NULL;
 265}
 266#endif
 267
 268/*
 269 *        Delete a VIF entry
 270 *        @notify: Set to 1, if the caller is a notifier_call
 271 */
 272
 273static int vif_delete(int vifi, int notify)
 274{
 275        struct vif_device *v;
 276        struct net_device *dev;
 277        struct in_device *in_dev;
 278
 279        if (vifi < 0 || vifi >= maxvif)
 280                return -EADDRNOTAVAIL;
 281
 282        v = &vif_table[vifi];
 283
 284        write_lock_bh(&mrt_lock);
 285        dev = v->dev;
 286        v->dev = NULL;
 287
 288        if (!dev) {
 289                write_unlock_bh(&mrt_lock);
 290                return -EADDRNOTAVAIL;
 291        }
 292
 293#ifdef CONFIG_IP_PIMSM
 294        if (vifi == reg_vif_num)
 295                reg_vif_num = -1;
 296#endif
 297
 298        if (vifi+1 == maxvif) {
 299                int tmp;
 300                for (tmp=vifi-1; tmp>=0; tmp--) {
 301                        if (VIF_EXISTS(tmp))
 302                                break;
 303                }
 304                maxvif = tmp+1;
 305        }
 306
 307        write_unlock_bh(&mrt_lock);
 308
 309        dev_set_allmulti(dev, -1);
 310
 311        if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 312                IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 313                ip_rt_multicast_event(in_dev);
 314        }
 315
 316        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 317                unregister_netdevice(dev);
 318
 319        dev_put(dev);
 320        return 0;
 321}
 322
 323/* Destroy an unresolved cache entry, killing queued skbs
 324   and reporting error to netlink readers.
 325 */
 326
 327static void ipmr_destroy_unres(struct mfc_cache *c)
 328{
 329        struct sk_buff *skb;
 330        struct nlmsgerr *e;
 331
 332        atomic_dec(&cache_resolve_queue_len);
 333
 334        while ((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
 335                if (ip_hdr(skb)->version == 0) {
 336                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 337                        nlh->nlmsg_type = NLMSG_ERROR;
 338                        nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 339                        skb_trim(skb, nlh->nlmsg_len);
 340                        e = NLMSG_DATA(nlh);
 341                        e->error = -ETIMEDOUT;
 342                        memset(&e->msg, 0, sizeof(e->msg));
 343
 344                        rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
 345                } else
 346                        kfree_skb(skb);
 347        }
 348
 349        kmem_cache_free(mrt_cachep, c);
 350}
 351
 352
 353/* Single timer process for all the unresolved queue. */
 354
 355static void ipmr_expire_process(unsigned long dummy)
 356{
 357        unsigned long now;
 358        unsigned long expires;
 359        struct mfc_cache *c, **cp;
 360
 361        if (!spin_trylock(&mfc_unres_lock)) {
 362                mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
 363                return;
 364        }
 365
 366        if (atomic_read(&cache_resolve_queue_len) == 0)
 367                goto out;
 368
 369        now = jiffies;
 370        expires = 10*HZ;
 371        cp = &mfc_unres_queue;
 372
 373        while ((c=*cp) != NULL) {
 374                if (time_after(c->mfc_un.unres.expires, now)) {
 375                        unsigned long interval = c->mfc_un.unres.expires - now;
 376                        if (interval < expires)
 377                                expires = interval;
 378                        cp = &c->next;
 379                        continue;
 380                }
 381
 382                *cp = c->next;
 383
 384                ipmr_destroy_unres(c);
 385        }
 386
 387        if (atomic_read(&cache_resolve_queue_len))
 388                mod_timer(&ipmr_expire_timer, jiffies + expires);
 389
 390out:
 391        spin_unlock(&mfc_unres_lock);
 392}
 393
 394/* Fill oifs list. It is called under write locked mrt_lock. */
 395
 396static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 397{
 398        int vifi;
 399
 400        cache->mfc_un.res.minvif = MAXVIFS;
 401        cache->mfc_un.res.maxvif = 0;
 402        memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 403
 404        for (vifi=0; vifi<maxvif; vifi++) {
 405                if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
 406                        cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 407                        if (cache->mfc_un.res.minvif > vifi)
 408                                cache->mfc_un.res.minvif = vifi;
 409                        if (cache->mfc_un.res.maxvif <= vifi)
 410                                cache->mfc_un.res.maxvif = vifi + 1;
 411                }
 412        }
 413}
 414
 415static int vif_add(struct vifctl *vifc, int mrtsock)
 416{
 417        int vifi = vifc->vifc_vifi;
 418        struct vif_device *v = &vif_table[vifi];
 419        struct net_device *dev;
 420        struct in_device *in_dev;
 421        int err;
 422
 423        /* Is vif busy ? */
 424        if (VIF_EXISTS(vifi))
 425                return -EADDRINUSE;
 426
 427        switch (vifc->vifc_flags) {
 428#ifdef CONFIG_IP_PIMSM
 429        case VIFF_REGISTER:
 430                /*
 431                 * Special Purpose VIF in PIM
 432                 * All the packets will be sent to the daemon
 433                 */
 434                if (reg_vif_num >= 0)
 435                        return -EADDRINUSE;
 436                dev = ipmr_reg_vif();
 437                if (!dev)
 438                        return -ENOBUFS;
 439                err = dev_set_allmulti(dev, 1);
 440                if (err) {
 441                        unregister_netdevice(dev);
 442                        dev_put(dev);
 443                        return err;
 444                }
 445                break;
 446#endif
 447        case VIFF_TUNNEL:
 448                dev = ipmr_new_tunnel(vifc);
 449                if (!dev)
 450                        return -ENOBUFS;
 451                err = dev_set_allmulti(dev, 1);
 452                if (err) {
 453                        ipmr_del_tunnel(dev, vifc);
 454                        dev_put(dev);
 455                        return err;
 456                }
 457                break;
 458        case 0:
 459                dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
 460                if (!dev)
 461                        return -EADDRNOTAVAIL;
 462                err = dev_set_allmulti(dev, 1);
 463                if (err) {
 464                        dev_put(dev);
 465                        return err;
 466                }
 467                break;
 468        default:
 469                return -EINVAL;
 470        }
 471
 472        if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
 473                return -EADDRNOTAVAIL;
 474        IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 475        ip_rt_multicast_event(in_dev);
 476
 477        /*
 478         *        Fill in the VIF structures
 479         */
 480        v->rate_limit=vifc->vifc_rate_limit;
 481        v->local=vifc->vifc_lcl_addr.s_addr;
 482        v->remote=vifc->vifc_rmt_addr.s_addr;
 483        v->flags=vifc->vifc_flags;
 484        if (!mrtsock)
 485                v->flags |= VIFF_STATIC;
 486        v->threshold=vifc->vifc_threshold;
 487        v->bytes_in = 0;
 488        v->bytes_out = 0;
 489        v->pkt_in = 0;
 490        v->pkt_out = 0;
 491        v->link = dev->ifindex;
 492        if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 493                v->link = dev->iflink;
 494
 495        /* And finish update writing critical data */
 496        write_lock_bh(&mrt_lock);
 497        v->dev=dev;
 498#ifdef CONFIG_IP_PIMSM
 499        if (v->flags&VIFF_REGISTER)
 500                reg_vif_num = vifi;
 501#endif
 502        if (vifi+1 > maxvif)
 503                maxvif = vifi+1;
 504        write_unlock_bh(&mrt_lock);
 505        return 0;
 506}
 507
 508static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
 509{
 510        int line=MFC_HASH(mcastgrp,origin);
 511        struct mfc_cache *c;
 512
 513        for (c=mfc_cache_array[line]; c; c = c->next) {
 514                if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 515                        break;
 516        }
 517        return c;
 518}
 519
 520/*
 521 *        Allocate a multicast cache entry
 522 */
 523static struct mfc_cache *ipmr_cache_alloc(void)
 524{
 525        struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 526        if (c==NULL)
 527                return NULL;
 528        c->mfc_un.res.minvif = MAXVIFS;
 529        return c;
 530}
 531
 532static struct mfc_cache *ipmr_cache_alloc_unres(void)
 533{
 534        struct mfc_cache *c=kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 535        if (c==NULL)
 536                return NULL;
 537        skb_queue_head_init(&c->mfc_un.unres.unresolved);
 538        c->mfc_un.unres.expires = jiffies + 10*HZ;
 539        return c;
 540}
 541
 542/*
 543 *        A cache entry has gone into a resolved state from queued
 544 */
 545
 546static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 547{
 548        struct sk_buff *skb;
 549        struct nlmsgerr *e;
 550
 551        /*
 552         *        Play the pending entries through our router
 553         */
 554
 555        while ((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 556                if (ip_hdr(skb)->version == 0) {
 557                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 558
 559                        if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 560                                nlh->nlmsg_len = (skb_tail_pointer(skb) -
 561                                                  (u8 *)nlh);
 562                        } else {
 563                                nlh->nlmsg_type = NLMSG_ERROR;
 564                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 565                                skb_trim(skb, nlh->nlmsg_len);
 566                                e = NLMSG_DATA(nlh);
 567                                e->error = -EMSGSIZE;
 568                                memset(&e->msg, 0, sizeof(e->msg));
 569                        }
 570
 571                        rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
 572                } else
 573                        ip_mr_forward(skb, c, 0);
 574        }
 575}
 576
 577/*
 578 *        Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 579 *        expects the following bizarre scheme.
 580 *
 581 *        Called under mrt_lock.
 582 */
 583
 584static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
 585{
 586        struct sk_buff *skb;
 587        const int ihl = ip_hdrlen(pkt);
 588        struct igmphdr *igmp;
 589        struct igmpmsg *msg;
 590        int ret;
 591
 592#ifdef CONFIG_IP_PIMSM
 593        if (assert == IGMPMSG_WHOLEPKT)
 594                skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 595        else
 596#endif
 597                skb = alloc_skb(128, GFP_ATOMIC);
 598
 599        if (!skb)
 600                return -ENOBUFS;
 601
 602#ifdef CONFIG_IP_PIMSM
 603        if (assert == IGMPMSG_WHOLEPKT) {
 604                /* Ugly, but we have no choice with this interface.
 605                   Duplicate old header, fix ihl, length etc.
 606                   And all this only to mangle msg->im_msgtype and
 607                   to set msg->im_mbz to "mbz" :-)
 608                 */
 609                skb_push(skb, sizeof(struct iphdr));
 610                skb_reset_network_header(skb);
 611                skb_reset_transport_header(skb);
 612                msg = (struct igmpmsg *)skb_network_header(skb);
 613                memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 614                msg->im_msgtype = IGMPMSG_WHOLEPKT;
 615                msg->im_mbz = 0;
 616                msg->im_vif = reg_vif_num;
 617                ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 618                ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 619                                             sizeof(struct iphdr));
 620        } else
 621#endif
 622        {
 623
 624        /*
 625         *        Copy the IP header
 626         */
 627
 628        skb->network_header = skb->tail;
 629        skb_put(skb, ihl);
 630        skb_copy_to_linear_data(skb, pkt->data, ihl);
 631        ip_hdr(skb)->protocol = 0;                        /* Flag to the kernel this is a route add */
 632        msg = (struct igmpmsg *)skb_network_header(skb);
 633        msg->im_vif = vifi;
 634        skb->dst = dst_clone(pkt->dst);
 635
 636        /*
 637         *        Add our header
 638         */
 639
 640        igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
 641        igmp->type        =
 642        msg->im_msgtype = assert;
 643        igmp->code         =        0;
 644        ip_hdr(skb)->tot_len = htons(skb->len);                        /* Fix the length */
 645        skb->transport_header = skb->network_header;
 646        }
 647
 648        if (mroute_socket == NULL) {
 649                kfree_skb(skb);
 650                return -EINVAL;
 651        }
 652
 653        /*
 654         *        Deliver to mrouted
 655         */
 656        if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
 657                if (net_ratelimit())
 658                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 659                kfree_skb(skb);
 660        }
 661
 662        return ret;
 663}
 664
 665/*
 666 *        Queue a packet for resolution. It gets locked cache entry!
 667 */
 668
 669static int
 670ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
 671{
 672        int err;
 673        struct mfc_cache *c;
 674        const struct iphdr *iph = ip_hdr(skb);
 675
 676        spin_lock_bh(&mfc_unres_lock);
 677        for (c=mfc_unres_queue; c; c=c->next) {
 678                if (c->mfc_mcastgrp == iph->daddr &&
 679                    c->mfc_origin == iph->saddr)
 680                        break;
 681        }
 682
 683        if (c == NULL) {
 684                /*
 685                 *        Create a new entry if allowable
 686                 */
 687
 688                if (atomic_read(&cache_resolve_queue_len)>=10 ||
 689                    (c=ipmr_cache_alloc_unres())==NULL) {
 690                        spin_unlock_bh(&mfc_unres_lock);
 691
 692                        kfree_skb(skb);
 693                        return -ENOBUFS;
 694                }
 695
 696                /*
 697                 *        Fill in the new cache entry
 698                 */
 699                c->mfc_parent        = -1;
 700                c->mfc_origin        = iph->saddr;
 701                c->mfc_mcastgrp        = iph->daddr;
 702
 703                /*
 704                 *        Reflect first query at mrouted.
 705                 */
 706                if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
 707                        /* If the report failed throw the cache entry
 708                           out - Brad Parker
 709                         */
 710                        spin_unlock_bh(&mfc_unres_lock);
 711
 712                        kmem_cache_free(mrt_cachep, c);
 713                        kfree_skb(skb);
 714                        return err;
 715                }
 716
 717                atomic_inc(&cache_resolve_queue_len);
 718                c->next = mfc_unres_queue;
 719                mfc_unres_queue = c;
 720
 721                mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
 722        }
 723
 724        /*
 725         *        See if we can append the packet
 726         */
 727        if (c->mfc_un.unres.unresolved.qlen>3) {
 728                kfree_skb(skb);
 729                err = -ENOBUFS;
 730        } else {
 731                skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
 732                err = 0;
 733        }
 734
 735        spin_unlock_bh(&mfc_unres_lock);
 736        return err;
 737}
 738
 739/*
 740 *        MFC cache manipulation by user space mroute daemon
 741 */
 742
 743static int ipmr_mfc_delete(struct mfcctl *mfc)
 744{
 745        int line;
 746        struct mfc_cache *c, **cp;
 747
 748        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 749
 750        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 751                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 752                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 753                        write_lock_bh(&mrt_lock);
 754                        *cp = c->next;
 755                        write_unlock_bh(&mrt_lock);
 756
 757                        kmem_cache_free(mrt_cachep, c);
 758                        return 0;
 759                }
 760        }
 761        return -ENOENT;
 762}
 763
 764static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
 765{
 766        int line;
 767        struct mfc_cache *uc, *c, **cp;
 768
 769        line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 770
 771        for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
 772                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 773                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 774                        break;
 775        }
 776
 777        if (c != NULL) {
 778                write_lock_bh(&mrt_lock);
 779                c->mfc_parent = mfc->mfcc_parent;
 780                ipmr_update_thresholds(c, mfc->mfcc_ttls);
 781                if (!mrtsock)
 782                        c->mfc_flags |= MFC_STATIC;
 783                write_unlock_bh(&mrt_lock);
 784                return 0;
 785        }
 786
 787        if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 788                return -EINVAL;
 789
 790        c=ipmr_cache_alloc();
 791        if (c==NULL)
 792                return -ENOMEM;
 793
 794        c->mfc_origin=mfc->mfcc_origin.s_addr;
 795        c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
 796        c->mfc_parent=mfc->mfcc_parent;
 797        ipmr_update_thresholds(c, mfc->mfcc_ttls);
 798        if (!mrtsock)
 799                c->mfc_flags |= MFC_STATIC;
 800
 801        write_lock_bh(&mrt_lock);
 802        c->next = mfc_cache_array[line];
 803        mfc_cache_array[line] = c;
 804        write_unlock_bh(&mrt_lock);
 805
 806        /*
 807         *        Check to see if we resolved a queued list. If so we
 808         *        need to send on the frames and tidy up.
 809         */
 810        spin_lock_bh(&mfc_unres_lock);
 811        for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
 812             cp = &uc->next) {
 813                if (uc->mfc_origin == c->mfc_origin &&
 814                    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 815                        *cp = uc->next;
 816                        if (atomic_dec_and_test(&cache_resolve_queue_len))
 817                                del_timer(&ipmr_expire_timer);
 818                        break;
 819                }
 820        }
 821        spin_unlock_bh(&mfc_unres_lock);
 822
 823        if (uc) {
 824                ipmr_cache_resolve(uc, c);
 825                kmem_cache_free(mrt_cachep, uc);
 826        }
 827        return 0;
 828}
 829
 830/*
 831 *        Close the multicast socket, and clear the vif tables etc
 832 */
 833
 834static void mroute_clean_tables(struct sock *sk)
 835{
 836        int i;
 837
 838        /*
 839         *        Shut down all active vif entries
 840         */
 841        for (i=0; i<maxvif; i++) {
 842                if (!(vif_table[i].flags&VIFF_STATIC))
 843                        vif_delete(i, 0);
 844        }
 845
 846        /*
 847         *        Wipe the cache
 848         */
 849        for (i=0;i<MFC_LINES;i++) {
 850                struct mfc_cache *c, **cp;
 851
 852                cp = &mfc_cache_array[i];
 853                while ((c = *cp) != NULL) {
 854                        if (c->mfc_flags&MFC_STATIC) {
 855                                cp = &c->next;
 856                                continue;
 857                        }
 858                        write_lock_bh(&mrt_lock);
 859                        *cp = c->next;
 860                        write_unlock_bh(&mrt_lock);
 861
 862                        kmem_cache_free(mrt_cachep, c);
 863                }
 864        }
 865
 866        if (atomic_read(&cache_resolve_queue_len) != 0) {
 867                struct mfc_cache *c;
 868
 869                spin_lock_bh(&mfc_unres_lock);
 870                while (mfc_unres_queue != NULL) {
 871                        c = mfc_unres_queue;
 872                        mfc_unres_queue = c->next;
 873                        spin_unlock_bh(&mfc_unres_lock);
 874
 875                        ipmr_destroy_unres(c);
 876
 877                        spin_lock_bh(&mfc_unres_lock);
 878                }
 879                spin_unlock_bh(&mfc_unres_lock);
 880        }
 881}
 882
 883static void mrtsock_destruct(struct sock *sk)
 884{
 885        rtnl_lock();
 886        if (sk == mroute_socket) {
 887                IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
 888
 889                write_lock_bh(&mrt_lock);
 890                mroute_socket=NULL;
 891                write_unlock_bh(&mrt_lock);
 892
 893                mroute_clean_tables(sk);
 894        }
 895        rtnl_unlock();
 896}
 897
 898/*
 899 *        Socket options and virtual interface manipulation. The whole
 900 *        virtual interface system is a complete heap, but unfortunately
 901 *        that's how BSD mrouted happens to think. Maybe one day with a proper
 902 *        MOSPF/PIM router set up we can clean this up.
 903 */
 904
 905int ip_mroute_setsockopt(struct sock *sk,int optname,char __user *optval,int optlen)
 906{
 907        int ret;
 908        struct vifctl vif;
 909        struct mfcctl mfc;
 910
 911        if (optname != MRT_INIT) {
 912                if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
 913                        return -EACCES;
 914        }
 915
 916        switch (optname) {
 917        case MRT_INIT:
 918                if (sk->sk_type != SOCK_RAW ||
 919                    inet_sk(sk)->num != IPPROTO_IGMP)
 920                        return -EOPNOTSUPP;
 921                if (optlen!=sizeof(int))
 922                        return -ENOPROTOOPT;
 923
 924                rtnl_lock();
 925                if (mroute_socket) {
 926                        rtnl_unlock();
 927                        return -EADDRINUSE;
 928                }
 929
 930                ret = ip_ra_control(sk, 1, mrtsock_destruct);
 931                if (ret == 0) {
 932                        write_lock_bh(&mrt_lock);
 933                        mroute_socket=sk;
 934                        write_unlock_bh(&mrt_lock);
 935
 936                        IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
 937                }
 938                rtnl_unlock();
 939                return ret;
 940        case MRT_DONE:
 941                if (sk!=mroute_socket)
 942                        return -EACCES;
 943                return ip_ra_control(sk, 0, NULL);
 944        case MRT_ADD_VIF:
 945        case MRT_DEL_VIF:
 946                if (optlen!=sizeof(vif))
 947                        return -EINVAL;
 948                if (copy_from_user(&vif,optval,sizeof(vif)))
 949                        return -EFAULT;
 950                if (vif.vifc_vifi >= MAXVIFS)
 951                        return -ENFILE;
 952                rtnl_lock();
 953                if (optname==MRT_ADD_VIF) {
 954                        ret = vif_add(&vif, sk==mroute_socket);
 955                } else {
 956                        ret = vif_delete(vif.vifc_vifi, 0);
 957                }
 958                rtnl_unlock();
 959                return ret;
 960
 961                /*
 962                 *        Manipulate the forwarding caches. These live
 963                 *        in a sort of kernel/user symbiosis.
 964                 */
 965        case MRT_ADD_MFC:
 966        case MRT_DEL_MFC:
 967                if (optlen!=sizeof(mfc))
 968                        return -EINVAL;
 969                if (copy_from_user(&mfc,optval, sizeof(mfc)))
 970                        return -EFAULT;
 971                rtnl_lock();
 972                if (optname==MRT_DEL_MFC)
 973                        ret = ipmr_mfc_delete(&mfc);
 974                else
 975                        ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
 976                rtnl_unlock();
 977                return ret;
 978                /*
 979                 *        Control PIM assert.
 980                 */
 981        case MRT_ASSERT:
 982        {
 983                int v;
 984                if (get_user(v,(int __user *)optval))
 985                        return -EFAULT;
 986                mroute_do_assert=(v)?1:0;
 987                return 0;
 988        }
 989#ifdef CONFIG_IP_PIMSM
 990        case MRT_PIM:
 991        {
 992                int v;
 993
 994                if (get_user(v,(int __user *)optval))
 995                        return -EFAULT;
 996                v = (v) ? 1 : 0;
 997
 998                rtnl_lock();
 999                ret = 0;
1000                if (v != mroute_do_pim) {
1001                        mroute_do_pim = v;
1002                        mroute_do_assert = v;
1003#ifdef CONFIG_IP_PIMSM_V2
1004                        if (mroute_do_pim)
1005                                ret = inet_add_protocol(&pim_protocol,
1006                                                        IPPROTO_PIM);
1007                        else
1008                                ret = inet_del_protocol(&pim_protocol,
1009                                                        IPPROTO_PIM);
1010                        if (ret < 0)
1011                                ret = -EAGAIN;
1012#endif
1013                }
1014                rtnl_unlock();
1015                return ret;
1016        }
1017#endif
1018        /*
1019         *        Spurious command, or MRT_VERSION which you cannot
1020         *        set.
1021         */
1022        default:
1023                return -ENOPROTOOPT;
1024        }
1025}
1026
1027/*
1028 *        Getsock opt support for the multicast routing system.
1029 */
1030
1031int ip_mroute_getsockopt(struct sock *sk,int optname,char __user *optval,int __user *optlen)
1032{
1033        int olr;
1034        int val;
1035
1036        if (optname!=MRT_VERSION &&
1037#ifdef CONFIG_IP_PIMSM
1038           optname!=MRT_PIM &&
1039#endif
1040           optname!=MRT_ASSERT)
1041                return -ENOPROTOOPT;
1042
1043        if (get_user(olr, optlen))
1044                return -EFAULT;
1045
1046        olr = min_t(unsigned int, olr, sizeof(int));
1047        if (olr < 0)
1048                return -EINVAL;
1049
1050        if (put_user(olr,optlen))
1051                return -EFAULT;
1052        if (optname==MRT_VERSION)
1053                val=0x0305;
1054#ifdef CONFIG_IP_PIMSM
1055        else if (optname==MRT_PIM)
1056                val=mroute_do_pim;
1057#endif
1058        else
1059                val=mroute_do_assert;
1060        if (copy_to_user(optval,&val,olr))
1061                return -EFAULT;
1062        return 0;
1063}
1064
1065/*
1066 *        The IP multicast ioctl support routines.
1067 */
1068
1069int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1070{
1071        struct sioc_sg_req sr;
1072        struct sioc_vif_req vr;
1073        struct vif_device *vif;
1074        struct mfc_cache *c;
1075
1076        switch (cmd) {
1077        case SIOCGETVIFCNT:
1078                if (copy_from_user(&vr,arg,sizeof(vr)))
1079                        return -EFAULT;
1080                if (vr.vifi>=maxvif)
1081                        return -EINVAL;
1082                read_lock(&mrt_lock);
1083                vif=&vif_table[vr.vifi];
1084                if (VIF_EXISTS(vr.vifi))        {
1085                        vr.icount=vif->pkt_in;
1086                        vr.ocount=vif->pkt_out;
1087                        vr.ibytes=vif->bytes_in;
1088                        vr.obytes=vif->bytes_out;
1089                        read_unlock(&mrt_lock);
1090
1091                        if (copy_to_user(arg,&vr,sizeof(vr)))
1092                                return -EFAULT;
1093                        return 0;
1094                }
1095                read_unlock(&mrt_lock);
1096                return -EADDRNOTAVAIL;
1097        case SIOCGETSGCNT:
1098                if (copy_from_user(&sr,arg,sizeof(sr)))
1099                        return -EFAULT;
1100
1101                read_lock(&mrt_lock);
1102                c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1103                if (c) {
1104                        sr.pktcnt = c->mfc_un.res.pkt;
1105                        sr.bytecnt = c->mfc_un.res.bytes;
1106                        sr.wrong_if = c->mfc_un.res.wrong_if;
1107                        read_unlock(&mrt_lock);
1108
1109                        if (copy_to_user(arg,&sr,sizeof(sr)))
1110                                return -EFAULT;
1111                        return 0;
1112                }
1113                read_unlock(&mrt_lock);
1114                return -EADDRNOTAVAIL;
1115        default:
1116                return -ENOIOCTLCMD;
1117        }
1118}
1119
1120
1121static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1122{
1123        struct net_device *dev = ptr;
1124        struct vif_device *v;
1125        int ct;
1126
1127        if (!net_eq(dev_net(dev), &init_net))
1128                return NOTIFY_DONE;
1129
1130        if (event != NETDEV_UNREGISTER)
1131                return NOTIFY_DONE;
1132        v=&vif_table[0];
1133        for (ct=0;ct<maxvif;ct++,v++) {
1134                if (v->dev==dev)
1135                        vif_delete(ct, 1);
1136        }
1137        return NOTIFY_DONE;
1138}
1139
1140
1141static struct notifier_block ip_mr_notifier={
1142        .notifier_call = ipmr_device_event,
1143};
1144
1145/*
1146 *         Encapsulate a packet by attaching a valid IPIP header to it.
1147 *        This avoids tunnel drivers and other mess and gives us the speed so
1148 *        important for multicast video.
1149 */
1150
1151static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1152{
1153        struct iphdr *iph;
1154        struct iphdr *old_iph = ip_hdr(skb);
1155
1156        skb_push(skb, sizeof(struct iphdr));
1157        skb->transport_header = skb->network_header;
1158        skb_reset_network_header(skb);
1159        iph = ip_hdr(skb);
1160
1161        iph->version        =         4;
1162        iph->tos        =        old_iph->tos;
1163        iph->ttl        =        old_iph->ttl;
1164        iph->frag_off        =        0;
1165        iph->daddr        =        daddr;
1166        iph->saddr        =        saddr;
1167        iph->protocol        =        IPPROTO_IPIP;
1168        iph->ihl        =        5;
1169        iph->tot_len        =        htons(skb->len);
1170        ip_select_ident(iph, skb->dst, NULL);
1171        ip_send_check(iph);
1172
1173        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1174        nf_reset(skb);
1175}
1176
1177static inline int ipmr_forward_finish(struct sk_buff *skb)
1178{
1179        struct ip_options * opt        = &(IPCB(skb)->opt);
1180
1181        IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1182
1183        if (unlikely(opt->optlen))
1184                ip_forward_options(skb);
1185
1186        return dst_output(skb);
1187}
1188
1189/*
1190 *        Processing handlers for ipmr_forward
1191 */
1192
1193static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1194{
1195        const struct iphdr *iph = ip_hdr(skb);
1196        struct vif_device *vif = &vif_table[vifi];
1197        struct net_device *dev;
1198        struct rtable *rt;
1199        int    encap = 0;
1200
1201        if (vif->dev == NULL)
1202                goto out_free;
1203
1204#ifdef CONFIG_IP_PIMSM
1205        if (vif->flags & VIFF_REGISTER) {
1206                vif->pkt_out++;
1207                vif->bytes_out+=skb->len;
1208                vif->dev->stats.tx_bytes += skb->len;
1209                vif->dev->stats.tx_packets++;
1210                ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1211                kfree_skb(skb);
1212                return;
1213        }
1214#endif
1215
1216        if (vif->flags&VIFF_TUNNEL) {
1217                struct flowi fl = { .oif = vif->link,
1218                                    .nl_u = { .ip4_u =
1219                                              { .daddr = vif->remote,
1220                                                .saddr = vif->local,
1221                                                .tos = RT_TOS(iph->tos) } },
1222                                    .proto = IPPROTO_IPIP };
1223                if (ip_route_output_key(&init_net, &rt, &fl))
1224                        goto out_free;
1225                encap = sizeof(struct iphdr);
1226        } else {
1227                struct flowi fl = { .oif = vif->link,
1228                                    .nl_u = { .ip4_u =
1229                                              { .daddr = iph->daddr,
1230                                                .tos = RT_TOS(iph->tos) } },
1231                                    .proto = IPPROTO_IPIP };
1232                if (ip_route_output_key(&init_net, &rt, &fl))
1233                        goto out_free;
1234        }
1235
1236        dev = rt->u.dst.dev;
1237
1238        if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1239                /* Do not fragment multicasts. Alas, IPv4 does not
1240                   allow to send ICMP, so that packets will disappear
1241                   to blackhole.
1242                 */
1243
1244                IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1245                ip_rt_put(rt);
1246                goto out_free;
1247        }
1248
1249        encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1250
1251        if (skb_cow(skb, encap)) {
1252                ip_rt_put(rt);
1253                goto out_free;
1254        }
1255
1256        vif->pkt_out++;
1257        vif->bytes_out+=skb->len;
1258
1259        dst_release(skb->dst);
1260        skb->dst = &rt->u.dst;
1261        ip_decrease_ttl(ip_hdr(skb));
1262
1263        /* FIXME: forward and output firewalls used to be called here.
1264         * What do we do with netfilter? -- RR */
1265        if (vif->flags & VIFF_TUNNEL) {
1266                ip_encap(skb, vif->local, vif->remote);
1267                /* FIXME: extra output firewall step used to be here. --RR */
1268                vif->dev->stats.tx_packets++;
1269                vif->dev->stats.tx_bytes += skb->len;
1270        }
1271
1272        IPCB(skb)->flags |= IPSKB_FORWARDED;
1273
1274        /*
1275         * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1276         * not only before forwarding, but after forwarding on all output
1277         * interfaces. It is clear, if mrouter runs a multicasting
1278         * program, it should receive packets not depending to what interface
1279         * program is joined.
1280         * If we will not make it, the program will have to join on all
1281         * interfaces. On the other hand, multihoming host (or router, but
1282         * not mrouter) cannot join to more than one interface - it will
1283         * result in receiving multiple packets.
1284         */
1285        NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1286                ipmr_forward_finish);
1287        return;
1288
1289out_free:
1290        kfree_skb(skb);
1291        return;
1292}
1293
1294static int ipmr_find_vif(struct net_device *dev)
1295{
1296        int ct;
1297        for (ct=maxvif-1; ct>=0; ct--) {
1298                if (vif_table[ct].dev == dev)
1299                        break;
1300        }
1301        return ct;
1302}
1303
1304/* "local" means that we should preserve one skb (for local delivery) */
1305
1306static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1307{
1308        int psend = -1;
1309        int vif, ct;
1310
1311        vif = cache->mfc_parent;
1312        cache->mfc_un.res.pkt++;
1313        cache->mfc_un.res.bytes += skb->len;
1314
1315        /*
1316         * Wrong interface: drop packet and (maybe) send PIM assert.
1317         */
1318        if (vif_table[vif].dev != skb->dev) {
1319                int true_vifi;
1320
1321                if (skb->rtable->fl.iif == 0) {
1322                        /* It is our own packet, looped back.
1323                           Very complicated situation...
1324
1325                           The best workaround until routing daemons will be
1326                           fixed is not to redistribute packet, if it was
1327                           send through wrong interface. It means, that
1328                           multicast applications WILL NOT work for
1329                           (S,G), which have default multicast route pointing
1330                           to wrong oif. In any case, it is not a good
1331                           idea to use multicasting applications on router.
1332                         */
1333                        goto dont_forward;
1334                }
1335
1336                cache->mfc_un.res.wrong_if++;
1337                true_vifi = ipmr_find_vif(skb->dev);
1338
1339                if (true_vifi >= 0 && mroute_do_assert &&
1340                    /* pimsm uses asserts, when switching from RPT to SPT,
1341                       so that we cannot check that packet arrived on an oif.
1342                       It is bad, but otherwise we would need to move pretty
1343                       large chunk of pimd to kernel. Ough... --ANK
1344                     */
1345                    (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1346                    time_after(jiffies,
1347                               cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1348                        cache->mfc_un.res.last_assert = jiffies;
1349                        ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1350                }
1351                goto dont_forward;
1352        }
1353
1354        vif_table[vif].pkt_in++;
1355        vif_table[vif].bytes_in+=skb->len;
1356
1357        /*
1358         *        Forward the frame
1359         */
1360        for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1361                if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1362                        if (psend != -1) {
1363                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1364                                if (skb2)
1365                                        ipmr_queue_xmit(skb2, cache, psend);
1366                        }
1367                        psend=ct;
1368                }
1369        }
1370        if (psend != -1) {
1371                if (local) {
1372                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1373                        if (skb2)
1374                                ipmr_queue_xmit(skb2, cache, psend);
1375                } else {
1376                        ipmr_queue_xmit(skb, cache, psend);
1377                        return 0;
1378                }
1379        }
1380
1381dont_forward:
1382        if (!local)
1383                kfree_skb(skb);
1384        return 0;
1385}
1386
1387
1388/*
1389 *        Multicast packets for forwarding arrive here
1390 */
1391
1392int ip_mr_input(struct sk_buff *skb)
1393{
1394        struct mfc_cache *cache;
1395        int local = skb->rtable->rt_flags&RTCF_LOCAL;
1396
1397        /* Packet is looped back after forward, it should not be
1398           forwarded second time, but still can be delivered locally.
1399         */
1400        if (IPCB(skb)->flags&IPSKB_FORWARDED)
1401                goto dont_forward;
1402
1403        if (!local) {
1404                    if (IPCB(skb)->opt.router_alert) {
1405                            if (ip_call_ra_chain(skb))
1406                                    return 0;
1407                    } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1408                            /* IGMPv1 (and broken IGMPv2 implementations sort of
1409                               Cisco IOS <= 11.2(8)) do not put router alert
1410                               option to IGMP packets destined to routable
1411                               groups. It is very bad, because it means
1412                               that we can forward NO IGMP messages.
1413                             */
1414                            read_lock(&mrt_lock);
1415                            if (mroute_socket) {
1416                                    nf_reset(skb);
1417                                    raw_rcv(mroute_socket, skb);
1418                                    read_unlock(&mrt_lock);
1419                                    return 0;
1420                            }
1421                            read_unlock(&mrt_lock);
1422                    }
1423        }
1424
1425        read_lock(&mrt_lock);
1426        cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1427
1428        /*
1429         *        No usable cache entry
1430         */
1431        if (cache==NULL) {
1432                int vif;
1433
1434                if (local) {
1435                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1436                        ip_local_deliver(skb);
1437                        if (skb2 == NULL) {
1438                                read_unlock(&mrt_lock);
1439                                return -ENOBUFS;
1440                        }
1441                        skb = skb2;
1442                }
1443
1444                vif = ipmr_find_vif(skb->dev);
1445                if (vif >= 0) {
1446                        int err = ipmr_cache_unresolved(vif, skb);
1447                        read_unlock(&mrt_lock);
1448
1449                        return err;
1450                }
1451                read_unlock(&mrt_lock);
1452                kfree_skb(skb);
1453                return -ENODEV;
1454        }
1455
1456        ip_mr_forward(skb, cache, local);
1457
1458        read_unlock(&mrt_lock);
1459
1460        if (local)
1461                return ip_local_deliver(skb);
1462
1463        return 0;
1464
1465dont_forward:
1466        if (local)
1467                return ip_local_deliver(skb);
1468        kfree_skb(skb);
1469        return 0;
1470}
1471
1472#ifdef CONFIG_IP_PIMSM_V1
1473/*
1474 * Handle IGMP messages of PIMv1
1475 */
1476
1477int pim_rcv_v1(struct sk_buff * skb)
1478{
1479        struct igmphdr *pim;
1480        struct iphdr   *encap;
1481        struct net_device  *reg_dev = NULL;
1482
1483        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1484                goto drop;
1485
1486        pim = igmp_hdr(skb);
1487
1488        if (!mroute_do_pim ||
1489            skb->len < sizeof(*pim) + sizeof(*encap) ||
1490            pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1491                goto drop;
1492
1493        encap = (struct iphdr *)(skb_transport_header(skb) +
1494                                 sizeof(struct igmphdr));
1495        /*
1496           Check that:
1497           a. packet is really destinted to a multicast group
1498           b. packet is not a NULL-REGISTER
1499           c. packet is not truncated
1500         */
1501        if (!ipv4_is_multicast(encap->daddr) ||
1502            encap->tot_len == 0 ||
1503            ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1504                goto drop;
1505
1506        read_lock(&mrt_lock);
1507        if (reg_vif_num >= 0)
1508                reg_dev = vif_table[reg_vif_num].dev;
1509        if (reg_dev)
1510                dev_hold(reg_dev);
1511        read_unlock(&mrt_lock);
1512
1513        if (reg_dev == NULL)
1514                goto drop;
1515
1516        skb->mac_header = skb->network_header;
1517        skb_pull(skb, (u8*)encap - skb->data);
1518        skb_reset_network_header(skb);
1519        skb->dev = reg_dev;
1520        skb->protocol = htons(ETH_P_IP);
1521        skb->ip_summed = 0;
1522        skb->pkt_type = PACKET_HOST;
1523        dst_release(skb->dst);
1524        skb->dst = NULL;
1525        reg_dev->stats.rx_bytes += skb->len;
1526        reg_dev->stats.rx_packets++;
1527        nf_reset(skb);
1528        netif_rx(skb);
1529        dev_put(reg_dev);
1530        return 0;
1531 drop:
1532        kfree_skb(skb);
1533        return 0;
1534}
1535#endif
1536
1537#ifdef CONFIG_IP_PIMSM_V2
1538static int pim_rcv(struct sk_buff * skb)
1539{
1540        struct pimreghdr *pim;
1541        struct iphdr   *encap;
1542        struct net_device  *reg_dev = NULL;
1543
1544        if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
1545                goto drop;
1546
1547        pim = (struct pimreghdr *)skb_transport_header(skb);
1548        if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1549            (pim->flags&PIM_NULL_REGISTER) ||
1550            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1551             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1552                goto drop;
1553
1554        /* check if the inner packet is destined to mcast group */
1555        encap = (struct iphdr *)(skb_transport_header(skb) +
1556                                 sizeof(struct pimreghdr));
1557        if (!ipv4_is_multicast(encap->daddr) ||
1558            encap->tot_len == 0 ||
1559            ntohs(encap->tot_len) + sizeof(*pim) > skb->len)
1560                goto drop;
1561
1562        read_lock(&mrt_lock);
1563        if (reg_vif_num >= 0)
1564                reg_dev = vif_table[reg_vif_num].dev;
1565        if (reg_dev)
1566                dev_hold(reg_dev);
1567        read_unlock(&mrt_lock);
1568
1569        if (reg_dev == NULL)
1570                goto drop;
1571
1572        skb->mac_header = skb->network_header;
1573        skb_pull(skb, (u8*)encap - skb->data);
1574        skb_reset_network_header(skb);
1575        skb->dev = reg_dev;
1576        skb->protocol = htons(ETH_P_IP);
1577        skb->ip_summed = 0;
1578        skb->pkt_type = PACKET_HOST;
1579        dst_release(skb->dst);
1580        reg_dev->stats.rx_bytes += skb->len;
1581        reg_dev->stats.rx_packets++;
1582        skb->dst = NULL;
1583        nf_reset(skb);
1584        netif_rx(skb);
1585        dev_put(reg_dev);
1586        return 0;
1587 drop:
1588        kfree_skb(skb);
1589        return 0;
1590}
1591#endif
1592
1593static int
1594ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1595{
1596        int ct;
1597        struct rtnexthop *nhp;
1598        struct net_device *dev = vif_table[c->mfc_parent].dev;
1599        u8 *b = skb_tail_pointer(skb);
1600        struct rtattr *mp_head;
1601
1602        if (dev)
1603                RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1604
1605        mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1606
1607        for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1608                if (c->mfc_un.res.ttls[ct] < 255) {
1609                        if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1610                                goto rtattr_failure;
1611                        nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1612                        nhp->rtnh_flags = 0;
1613                        nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1614                        nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1615                        nhp->rtnh_len = sizeof(*nhp);
1616                }
1617        }
1618        mp_head->rta_type = RTA_MULTIPATH;
1619        mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1620        rtm->rtm_type = RTN_MULTICAST;
1621        return 1;
1622
1623rtattr_failure:
1624        nlmsg_trim(skb, b);
1625        return -EMSGSIZE;
1626}
1627
1628int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1629{
1630        int err;
1631        struct mfc_cache *cache;
1632        struct rtable *rt = skb->rtable;
1633
1634        read_lock(&mrt_lock);
1635        cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1636
1637        if (cache==NULL) {
1638                struct sk_buff *skb2;
1639                struct iphdr *iph;
1640                struct net_device *dev;
1641                int vif;
1642
1643                if (nowait) {
1644                        read_unlock(&mrt_lock);
1645                        return -EAGAIN;
1646                }
1647
1648                dev = skb->dev;
1649                if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1650                        read_unlock(&mrt_lock);
1651                        return -ENODEV;
1652                }
1653                skb2 = skb_clone(skb, GFP_ATOMIC);
1654                if (!skb2) {
1655                        read_unlock(&mrt_lock);
1656                        return -ENOMEM;
1657                }
1658
1659                skb_push(skb2, sizeof(struct iphdr));
1660                skb_reset_network_header(skb2);
1661                iph = ip_hdr(skb2);
1662                iph->ihl = sizeof(struct iphdr) >> 2;
1663                iph->saddr = rt->rt_src;
1664                iph->daddr = rt->rt_dst;
1665                iph->version = 0;
1666                err = ipmr_cache_unresolved(vif, skb2);
1667                read_unlock(&mrt_lock);
1668                return err;
1669        }
1670
1671        if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1672                cache->mfc_flags |= MFC_NOTIFY;
1673        err = ipmr_fill_mroute(skb, cache, rtm);
1674        read_unlock(&mrt_lock);
1675        return err;
1676}
1677
1678#ifdef CONFIG_PROC_FS
1679/*
1680 *        The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1681 */
1682struct ipmr_vif_iter {
1683        int ct;
1684};
1685
1686static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1687                                           loff_t pos)
1688{
1689        for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
1690                if (!VIF_EXISTS(iter->ct))
1691                        continue;
1692                if (pos-- == 0)
1693                        return &vif_table[iter->ct];
1694        }
1695        return NULL;
1696}
1697
1698static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1699        __acquires(mrt_lock)
1700{
1701        read_lock(&mrt_lock);
1702        return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
1703                : SEQ_START_TOKEN;
1704}
1705
1706static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1707{
1708        struct ipmr_vif_iter *iter = seq->private;
1709
1710        ++*pos;
1711        if (v == SEQ_START_TOKEN)
1712                return ipmr_vif_seq_idx(iter, 0);
1713
1714        while (++iter->ct < maxvif) {
1715                if (!VIF_EXISTS(iter->ct))
1716                        continue;
1717                return &vif_table[iter->ct];
1718        }
1719        return NULL;
1720}
1721
1722static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1723        __releases(mrt_lock)
1724{
1725        read_unlock(&mrt_lock);
1726}
1727
1728static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1729{
1730        if (v == SEQ_START_TOKEN) {
1731                seq_puts(seq,
1732                         "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1733        } else {
1734                const struct vif_device *vif = v;
1735                const char *name =  vif->dev ? vif->dev->name : "none";
1736
1737                seq_printf(seq,
1738                           "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1739                           vif - vif_table,
1740                           name, vif->bytes_in, vif->pkt_in,
1741                           vif->bytes_out, vif->pkt_out,
1742                           vif->flags, vif->local, vif->remote);
1743        }
1744        return 0;
1745}
1746
1747static const struct seq_operations ipmr_vif_seq_ops = {
1748        .start = ipmr_vif_seq_start,
1749        .next  = ipmr_vif_seq_next,
1750        .stop  = ipmr_vif_seq_stop,
1751        .show  = ipmr_vif_seq_show,
1752};
1753
1754static int ipmr_vif_open(struct inode *inode, struct file *file)
1755{
1756        return seq_open_private(file, &ipmr_vif_seq_ops,
1757                        sizeof(struct ipmr_vif_iter));
1758}
1759
1760static const struct file_operations ipmr_vif_fops = {
1761        .owner         = THIS_MODULE,
1762        .open    = ipmr_vif_open,
1763        .read    = seq_read,
1764        .llseek  = seq_lseek,
1765        .release = seq_release_private,
1766};
1767
1768struct ipmr_mfc_iter {
1769        struct mfc_cache **cache;
1770        int ct;
1771};
1772
1773
1774static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1775{
1776        struct mfc_cache *mfc;
1777
1778        it->cache = mfc_cache_array;
1779        read_lock(&mrt_lock);
1780        for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1781                for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
1782                        if (pos-- == 0)
1783                                return mfc;
1784        read_unlock(&mrt_lock);
1785
1786        it->cache = &mfc_unres_queue;
1787        spin_lock_bh(&mfc_unres_lock);
1788        for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1789                if (pos-- == 0)
1790                        return mfc;
1791        spin_unlock_bh(&mfc_unres_lock);
1792
1793        it->cache = NULL;
1794        return NULL;
1795}
1796
1797
1798static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1799{
1800        struct ipmr_mfc_iter *it = seq->private;
1801        it->cache = NULL;
1802        it->ct = 0;
1803        return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
1804                : SEQ_START_TOKEN;
1805}
1806
1807static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808{
1809        struct mfc_cache *mfc = v;
1810        struct ipmr_mfc_iter *it = seq->private;
1811
1812        ++*pos;
1813
1814        if (v == SEQ_START_TOKEN)
1815                return ipmr_mfc_seq_idx(seq->private, 0);
1816
1817        if (mfc->next)
1818                return mfc->next;
1819
1820        if (it->cache == &mfc_unres_queue)
1821                goto end_of_list;
1822
1823        BUG_ON(it->cache != mfc_cache_array);
1824
1825        while (++it->ct < MFC_LINES) {
1826                mfc = mfc_cache_array[it->ct];
1827                if (mfc)
1828                        return mfc;
1829        }
1830
1831        /* exhausted cache_array, show unresolved */
1832        read_unlock(&mrt_lock);
1833        it->cache = &mfc_unres_queue;
1834        it->ct = 0;
1835
1836        spin_lock_bh(&mfc_unres_lock);
1837        mfc = mfc_unres_queue;
1838        if (mfc)
1839                return mfc;
1840
1841 end_of_list:
1842        spin_unlock_bh(&mfc_unres_lock);
1843        it->cache = NULL;
1844
1845        return NULL;
1846}
1847
1848static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1849{
1850        struct ipmr_mfc_iter *it = seq->private;
1851
1852        if (it->cache == &mfc_unres_queue)
1853                spin_unlock_bh(&mfc_unres_lock);
1854        else if (it->cache == mfc_cache_array)
1855                read_unlock(&mrt_lock);
1856}
1857
1858static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1859{
1860        int n;
1861
1862        if (v == SEQ_START_TOKEN) {
1863                seq_puts(seq,
1864                 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1865        } else {
1866                const struct mfc_cache *mfc = v;
1867                const struct ipmr_mfc_iter *it = seq->private;
1868
1869                seq_printf(seq, "%08lX %08lX %-3d %8ld %8ld %8ld",
1870                           (unsigned long) mfc->mfc_mcastgrp,
1871                           (unsigned long) mfc->mfc_origin,
1872                           mfc->mfc_parent,
1873                           mfc->mfc_un.res.pkt,
1874                           mfc->mfc_un.res.bytes,
1875                           mfc->mfc_un.res.wrong_if);
1876
1877                if (it->cache != &mfc_unres_queue) {
1878                        for (n = mfc->mfc_un.res.minvif;
1879                             n < mfc->mfc_un.res.maxvif; n++ ) {
1880                                if (VIF_EXISTS(n)
1881                                   && mfc->mfc_un.res.ttls[n] < 255)
1882                                seq_printf(seq,
1883                                           " %2d:%-3d",
1884                                           n, mfc->mfc_un.res.ttls[n]);
1885                        }
1886                }
1887                seq_putc(seq, '\n');
1888        }
1889        return 0;
1890}
1891
1892static const struct seq_operations ipmr_mfc_seq_ops = {
1893        .start = ipmr_mfc_seq_start,
1894        .next  = ipmr_mfc_seq_next,
1895        .stop  = ipmr_mfc_seq_stop,
1896        .show  = ipmr_mfc_seq_show,
1897};
1898
1899static int ipmr_mfc_open(struct inode *inode, struct file *file)
1900{
1901        return seq_open_private(file, &ipmr_mfc_seq_ops,
1902                        sizeof(struct ipmr_mfc_iter));
1903}
1904
1905static const struct file_operations ipmr_mfc_fops = {
1906        .owner         = THIS_MODULE,
1907        .open    = ipmr_mfc_open,
1908        .read    = seq_read,
1909        .llseek  = seq_lseek,
1910        .release = seq_release_private,
1911};
1912#endif
1913
1914#ifdef CONFIG_IP_PIMSM_V2
1915static struct net_protocol pim_protocol = {
1916        .handler        =        pim_rcv,
1917};
1918#endif
1919
1920
1921/*
1922 *        Setup for IP multicast routing
1923 */
1924
1925int __init ip_mr_init(void)
1926{
1927        int err;
1928
1929        mrt_cachep = kmem_cache_create("ip_mrt_cache",
1930                                       sizeof(struct mfc_cache),
1931                                       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1932                                       NULL);
1933        if (!mrt_cachep)
1934                return -ENOMEM;
1935
1936        setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1937        err = register_netdevice_notifier(&ip_mr_notifier);
1938        if (err)
1939                goto reg_notif_fail;
1940#ifdef CONFIG_PROC_FS
1941        err = -ENOMEM;
1942        if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1943                goto proc_vif_fail;
1944        if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1945                goto proc_cache_fail;
1946#endif
1947        return 0;
1948#ifdef CONFIG_PROC_FS
1949proc_cache_fail:
1950        proc_net_remove(&init_net, "ip_mr_vif");
1951proc_vif_fail:
1952        unregister_netdevice_notifier(&ip_mr_notifier);
1953#endif
1954reg_notif_fail:
1955        del_timer(&ipmr_expire_timer);
1956        kmem_cache_destroy(mrt_cachep);
1957        return err;
1958}