1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248#include <linux/kernel.h>
249#include <linux/module.h>
250#include <linux/types.h>
251#include <linux/fcntl.h>
252#include <linux/poll.h>
253#include <linux/init.h>
254#include <linux/fs.h>
255#include <linux/skbuff.h>
256#include <linux/scatterlist.h>
257#include <linux/splice.h>
258#include <linux/net.h>
259#include <linux/socket.h>
260#include <linux/random.h>
261#include <linux/bootmem.h>
262#include <linux/highmem.h>
263#include <linux/swap.h>
264#include <linux/cache.h>
265#include <linux/err.h>
266#include <linux/crypto.h>
267
268#include <net/icmp.h>
269#include <net/tcp.h>
270#include <net/xfrm.h>
271#include <net/ip.h>
272#include <net/netdma.h>
273#include <net/sock.h>
274
275#include <asm/uaccess.h>
276#include <asm/ioctls.h>
277
278int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
279
280atomic_t tcp_orphan_count = ATOMIC_INIT(0);
281
282EXPORT_SYMBOL_GPL(tcp_orphan_count);
283
284int sysctl_tcp_mem[3] __read_mostly;
285int sysctl_tcp_wmem[3] __read_mostly;
286int sysctl_tcp_rmem[3] __read_mostly;
287
288EXPORT_SYMBOL(sysctl_tcp_mem);
289EXPORT_SYMBOL(sysctl_tcp_rmem);
290EXPORT_SYMBOL(sysctl_tcp_wmem);
291
292atomic_t tcp_memory_allocated;
293atomic_t tcp_sockets_allocated;
294
295EXPORT_SYMBOL(tcp_memory_allocated);
296EXPORT_SYMBOL(tcp_sockets_allocated);
297
298
299
300
301struct tcp_splice_state {
302 struct pipe_inode_info *pipe;
303 size_t len;
304 unsigned int flags;
305};
306
307
308
309
310
311
312
313int tcp_memory_pressure __read_mostly;
314
315EXPORT_SYMBOL(tcp_memory_pressure);
316
317void tcp_enter_memory_pressure(struct sock *sk)
318{
319 if (!tcp_memory_pressure) {
320 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
321 tcp_memory_pressure = 1;
322 }
323}
324
325EXPORT_SYMBOL(tcp_enter_memory_pressure);
326
327
328
329
330
331
332
333
334unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
335{
336 unsigned int mask;
337 struct sock *sk = sock->sk;
338 struct tcp_sock *tp = tcp_sk(sk);
339
340 poll_wait(file, sk->sk_sleep, wait);
341 if (sk->sk_state == TCP_LISTEN)
342 return inet_csk_listen_poll(sk);
343
344
345
346
347
348
349 mask = 0;
350 if (sk->sk_err)
351 mask = POLLERR;
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE)
381 mask |= POLLHUP;
382 if (sk->sk_shutdown & RCV_SHUTDOWN)
383 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
384
385
386 if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
387 int target = sock_rcvlowat(sk, 0, INT_MAX);
388
389 if (tp->urg_seq == tp->copied_seq &&
390 !sock_flag(sk, SOCK_URGINLINE) &&
391 tp->urg_data)
392 target--;
393
394
395
396
397 if (tp->rcv_nxt - tp->copied_seq >= target)
398 mask |= POLLIN | POLLRDNORM;
399
400 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
401 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
402 mask |= POLLOUT | POLLWRNORM;
403 } else {
404 set_bit(SOCK_ASYNC_NOSPACE,
405 &sk->sk_socket->flags);
406 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
407
408
409
410
411
412 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
413 mask |= POLLOUT | POLLWRNORM;
414 }
415 }
416
417 if (tp->urg_data & TCP_URG_VALID)
418 mask |= POLLPRI;
419 }
420 return mask;
421}
422
423int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
424{
425 struct tcp_sock *tp = tcp_sk(sk);
426 int answ;
427
428 switch (cmd) {
429 case SIOCINQ:
430 if (sk->sk_state == TCP_LISTEN)
431 return -EINVAL;
432
433 lock_sock(sk);
434 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
435 answ = 0;
436 else if (sock_flag(sk, SOCK_URGINLINE) ||
437 !tp->urg_data ||
438 before(tp->urg_seq, tp->copied_seq) ||
439 !before(tp->urg_seq, tp->rcv_nxt)) {
440 answ = tp->rcv_nxt - tp->copied_seq;
441
442
443 if (answ && !skb_queue_empty(&sk->sk_receive_queue))
444 answ -=
445 tcp_hdr((struct sk_buff *)sk->sk_receive_queue.prev)->fin;
446 } else
447 answ = tp->urg_seq - tp->copied_seq;
448 release_sock(sk);
449 break;
450 case SIOCATMARK:
451 answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
452 break;
453 case SIOCOUTQ:
454 if (sk->sk_state == TCP_LISTEN)
455 return -EINVAL;
456
457 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
458 answ = 0;
459 else
460 answ = tp->write_seq - tp->snd_una;
461 break;
462 default:
463 return -ENOIOCTLCMD;
464 }
465
466 return put_user(answ, (int __user *)arg);
467}
468
469static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
470{
471 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
472 tp->pushed_seq = tp->write_seq;
473}
474
475static inline int forced_push(struct tcp_sock *tp)
476{
477 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
478}
479
480static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
481{
482 struct tcp_sock *tp = tcp_sk(sk);
483 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
484
485 skb->csum = 0;
486 tcb->seq = tcb->end_seq = tp->write_seq;
487 tcb->flags = TCPCB_FLAG_ACK;
488 tcb->sacked = 0;
489 skb_header_release(skb);
490 tcp_add_write_queue_tail(sk, skb);
491 sk->sk_wmem_queued += skb->truesize;
492 sk_mem_charge(sk, skb->truesize);
493 if (tp->nonagle & TCP_NAGLE_PUSH)
494 tp->nonagle &= ~TCP_NAGLE_PUSH;
495}
496
497static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
498 struct sk_buff *skb)
499{
500 if (flags & MSG_OOB)
501 tp->snd_up = tp->write_seq;
502}
503
504static inline void tcp_push(struct sock *sk, int flags, int mss_now,
505 int nonagle)
506{
507 struct tcp_sock *tp = tcp_sk(sk);
508
509 if (tcp_send_head(sk)) {
510 struct sk_buff *skb = tcp_write_queue_tail(sk);
511 if (!(flags & MSG_MORE) || forced_push(tp))
512 tcp_mark_push(tp, skb);
513 tcp_mark_urg(tp, flags, skb);
514 __tcp_push_pending_frames(sk, mss_now,
515 (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
516 }
517}
518
519static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
520 unsigned int offset, size_t len)
521{
522 struct tcp_splice_state *tss = rd_desc->arg.data;
523
524 return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags);
525}
526
527static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss)
528{
529
530 read_descriptor_t rd_desc = {
531 .arg.data = tss,
532 };
533
534 return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv);
535}
536
537
538
539
540
541
542
543
544
545
546
547
548
549ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
550 struct pipe_inode_info *pipe, size_t len,
551 unsigned int flags)
552{
553 struct sock *sk = sock->sk;
554 struct tcp_splice_state tss = {
555 .pipe = pipe,
556 .len = len,
557 .flags = flags,
558 };
559 long timeo;
560 ssize_t spliced;
561 int ret;
562
563
564
565
566 if (unlikely(*ppos))
567 return -ESPIPE;
568
569 ret = spliced = 0;
570
571 lock_sock(sk);
572
573 timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
574 while (tss.len) {
575 ret = __tcp_splice_read(sk, &tss);
576 if (ret < 0)
577 break;
578 else if (!ret) {
579 if (spliced)
580 break;
581 if (flags & SPLICE_F_NONBLOCK) {
582 ret = -EAGAIN;
583 break;
584 }
585 if (sock_flag(sk, SOCK_DONE))
586 break;
587 if (sk->sk_err) {
588 ret = sock_error(sk);
589 break;
590 }
591 if (sk->sk_shutdown & RCV_SHUTDOWN)
592 break;
593 if (sk->sk_state == TCP_CLOSE) {
594
595
596
597
598 if (!sock_flag(sk, SOCK_DONE))
599 ret = -ENOTCONN;
600 break;
601 }
602 if (!timeo) {
603 ret = -EAGAIN;
604 break;
605 }
606 sk_wait_data(sk, &timeo);
607 if (signal_pending(current)) {
608 ret = sock_intr_errno(timeo);
609 break;
610 }
611 continue;
612 }
613 tss.len -= ret;
614 spliced += ret;
615
616 release_sock(sk);
617 lock_sock(sk);
618
619 if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
620 (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo ||
621 signal_pending(current))
622 break;
623 }
624
625 release_sock(sk);
626
627 if (spliced)
628 return spliced;
629
630 return ret;
631}
632
633struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
634{
635 struct sk_buff *skb;
636
637
638 size = ALIGN(size, 4);
639
640 skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
641 if (skb) {
642 if (sk_wmem_schedule(sk, skb->truesize)) {
643
644
645
646
647 skb_reserve(skb, skb_tailroom(skb) - size);
648 return skb;
649 }
650 __kfree_skb(skb);
651 } else {
652 sk->sk_prot->enter_memory_pressure(sk);
653 sk_stream_moderate_sndbuf(sk);
654 }
655 return NULL;
656}
657
658static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
659 size_t psize, int flags)
660{
661 struct tcp_sock *tp = tcp_sk(sk);
662 int mss_now, size_goal;
663 int err;
664 ssize_t copied;
665 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
666
667
668 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
669 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
670 goto out_err;
671
672 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
673
674 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
675 size_goal = tp->xmit_size_goal;
676 copied = 0;
677
678 err = -EPIPE;
679 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
680 goto do_error;
681
682 while (psize > 0) {
683 struct sk_buff *skb = tcp_write_queue_tail(sk);
684 struct page *page = pages[poffset / PAGE_SIZE];
685 int copy, i, can_coalesce;
686 int offset = poffset % PAGE_SIZE;
687 int size = min_t(size_t, psize, PAGE_SIZE - offset);
688
689 if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
690new_segment:
691 if (!sk_stream_memory_free(sk))
692 goto wait_for_sndbuf;
693
694 skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
695 if (!skb)
696 goto wait_for_memory;
697
698 skb_entail(sk, skb);
699 copy = size_goal;
700 }
701
702 if (copy > size)
703 copy = size;
704
705 i = skb_shinfo(skb)->nr_frags;
706 can_coalesce = skb_can_coalesce(skb, i, page, offset);
707 if (!can_coalesce && i >= MAX_SKB_FRAGS) {
708 tcp_mark_push(tp, skb);
709 goto new_segment;
710 }
711 if (!sk_wmem_schedule(sk, copy))
712 goto wait_for_memory;
713
714 if (can_coalesce) {
715 skb_shinfo(skb)->frags[i - 1].size += copy;
716 } else {
717 get_page(page);
718 skb_fill_page_desc(skb, i, page, offset, copy);
719 }
720
721 skb->len += copy;
722 skb->data_len += copy;
723 skb->truesize += copy;
724 sk->sk_wmem_queued += copy;
725 sk_mem_charge(sk, copy);
726 skb->ip_summed = CHECKSUM_PARTIAL;
727 tp->write_seq += copy;
728 TCP_SKB_CB(skb)->end_seq += copy;
729 skb_shinfo(skb)->gso_segs = 0;
730
731 if (!copied)
732 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
733
734 copied += copy;
735 poffset += copy;
736 if (!(psize -= copy))
737 goto out;
738
739 if (skb->len < size_goal || (flags & MSG_OOB))
740 continue;
741
742 if (forced_push(tp)) {
743 tcp_mark_push(tp, skb);
744 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
745 } else if (skb == tcp_send_head(sk))
746 tcp_push_one(sk, mss_now);
747 continue;
748
749wait_for_sndbuf:
750 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
751wait_for_memory:
752 if (copied)
753 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
754
755 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
756 goto do_error;
757
758 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
759 size_goal = tp->xmit_size_goal;
760 }
761
762out:
763 if (copied)
764 tcp_push(sk, flags, mss_now, tp->nonagle);
765 return copied;
766
767do_error:
768 if (copied)
769 goto out;
770out_err:
771 return sk_stream_error(sk, flags, err);
772}
773
774ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
775 size_t size, int flags)
776{
777 ssize_t res;
778 struct sock *sk = sock->sk;
779
780 if (!(sk->sk_route_caps & NETIF_F_SG) ||
781 !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
782 return sock_no_sendpage(sock, page, offset, size, flags);
783
784 lock_sock(sk);
785 TCP_CHECK_TIMER(sk);
786 res = do_tcp_sendpages(sk, &page, offset, size, flags);
787 TCP_CHECK_TIMER(sk);
788 release_sock(sk);
789 return res;
790}
791
792#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
793#define TCP_OFF(sk) (sk->sk_sndmsg_off)
794
795static inline int select_size(struct sock *sk)
796{
797 struct tcp_sock *tp = tcp_sk(sk);
798 int tmp = tp->mss_cache;
799
800 if (sk->sk_route_caps & NETIF_F_SG) {
801 if (sk_can_gso(sk))
802 tmp = 0;
803 else {
804 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
805
806 if (tmp >= pgbreak &&
807 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
808 tmp = pgbreak;
809 }
810 }
811
812 return tmp;
813}
814
815int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
816 size_t size)
817{
818 struct sock *sk = sock->sk;
819 struct iovec *iov;
820 struct tcp_sock *tp = tcp_sk(sk);
821 struct sk_buff *skb;
822 int iovlen, flags;
823 int mss_now, size_goal;
824 int err, copied;
825 long timeo;
826
827 lock_sock(sk);
828 TCP_CHECK_TIMER(sk);
829
830 flags = msg->msg_flags;
831 timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
832
833
834 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
835 if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
836 goto out_err;
837
838
839 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
840
841 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
842 size_goal = tp->xmit_size_goal;
843
844
845 iovlen = msg->msg_iovlen;
846 iov = msg->msg_iov;
847 copied = 0;
848
849 err = -EPIPE;
850 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
851 goto do_error;
852
853 while (--iovlen >= 0) {
854 int seglen = iov->iov_len;
855 unsigned char __user *from = iov->iov_base;
856
857 iov++;
858
859 while (seglen > 0) {
860 int copy;
861
862 skb = tcp_write_queue_tail(sk);
863
864 if (!tcp_send_head(sk) ||
865 (copy = size_goal - skb->len) <= 0) {
866
867new_segment:
868
869
870
871 if (!sk_stream_memory_free(sk))
872 goto wait_for_sndbuf;
873
874 skb = sk_stream_alloc_skb(sk, select_size(sk),
875 sk->sk_allocation);
876 if (!skb)
877 goto wait_for_memory;
878
879
880
881
882 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
883 skb->ip_summed = CHECKSUM_PARTIAL;
884
885 skb_entail(sk, skb);
886 copy = size_goal;
887 }
888
889
890 if (copy > seglen)
891 copy = seglen;
892
893
894 if (skb_tailroom(skb) > 0) {
895
896 if (copy > skb_tailroom(skb))
897 copy = skb_tailroom(skb);
898 if ((err = skb_add_data(skb, from, copy)) != 0)
899 goto do_fault;
900 } else {
901 int merge = 0;
902 int i = skb_shinfo(skb)->nr_frags;
903 struct page *page = TCP_PAGE(sk);
904 int off = TCP_OFF(sk);
905
906 if (skb_can_coalesce(skb, i, page, off) &&
907 off != PAGE_SIZE) {
908
909
910 merge = 1;
911 } else if (i == MAX_SKB_FRAGS ||
912 (!i &&
913 !(sk->sk_route_caps & NETIF_F_SG))) {
914
915
916
917
918 tcp_mark_push(tp, skb);
919 goto new_segment;
920 } else if (page) {
921 if (off == PAGE_SIZE) {
922 put_page(page);
923 TCP_PAGE(sk) = page = NULL;
924 off = 0;
925 }
926 } else
927 off = 0;
928
929 if (copy > PAGE_SIZE - off)
930 copy = PAGE_SIZE - off;
931
932 if (!sk_wmem_schedule(sk, copy))
933 goto wait_for_memory;
934
935 if (!page) {
936
937 if (!(page = sk_stream_alloc_page(sk)))
938 goto wait_for_memory;
939 }
940
941
942
943 err = skb_copy_to_page(sk, from, skb, page,
944 off, copy);
945 if (err) {
946
947
948
949 if (!TCP_PAGE(sk)) {
950 TCP_PAGE(sk) = page;
951 TCP_OFF(sk) = 0;
952 }
953 goto do_error;
954 }
955
956
957 if (merge) {
958 skb_shinfo(skb)->frags[i - 1].size +=
959 copy;
960 } else {
961 skb_fill_page_desc(skb, i, page, off, copy);
962 if (TCP_PAGE(sk)) {
963 get_page(page);
964 } else if (off + copy < PAGE_SIZE) {
965 get_page(page);
966 TCP_PAGE(sk) = page;
967 }
968 }
969
970 TCP_OFF(sk) = off + copy;
971 }
972
973 if (!copied)
974 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
975
976 tp->write_seq += copy;
977 TCP_SKB_CB(skb)->end_seq += copy;
978 skb_shinfo(skb)->gso_segs = 0;
979
980 from += copy;
981 copied += copy;
982 if ((seglen -= copy) == 0 && iovlen == 0)
983 goto out;
984
985 if (skb->len < size_goal || (flags & MSG_OOB))
986 continue;
987
988 if (forced_push(tp)) {
989 tcp_mark_push(tp, skb);
990 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
991 } else if (skb == tcp_send_head(sk))
992 tcp_push_one(sk, mss_now);
993 continue;
994
995wait_for_sndbuf:
996 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
997wait_for_memory:
998 if (copied)
999 tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
1000
1001 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1002 goto do_error;
1003
1004 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
1005 size_goal = tp->xmit_size_goal;
1006 }
1007 }
1008
1009out:
1010 if (copied)
1011 tcp_push(sk, flags, mss_now, tp->nonagle);
1012 TCP_CHECK_TIMER(sk);
1013 release_sock(sk);
1014 return copied;
1015
1016do_fault:
1017 if (!skb->len) {
1018 tcp_unlink_write_queue(skb, sk);
1019
1020
1021
1022 tcp_check_send_head(sk, skb);
1023 sk_wmem_free_skb(sk, skb);
1024 }
1025
1026do_error:
1027 if (copied)
1028 goto out;
1029out_err:
1030 err = sk_stream_error(sk, flags, err);
1031 TCP_CHECK_TIMER(sk);
1032 release_sock(sk);
1033 return err;
1034}
1035
1036
1037
1038
1039
1040
1041static int tcp_recv_urg(struct sock *sk, long timeo,
1042 struct msghdr *msg, int len, int flags,
1043 int *addr_len)
1044{
1045 struct tcp_sock *tp = tcp_sk(sk);
1046
1047
1048 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data ||
1049 tp->urg_data == TCP_URG_READ)
1050 return -EINVAL;
1051
1052 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE))
1053 return -ENOTCONN;
1054
1055 if (tp->urg_data & TCP_URG_VALID) {
1056 int err = 0;
1057 char c = tp->urg_data;
1058
1059 if (!(flags & MSG_PEEK))
1060 tp->urg_data = TCP_URG_READ;
1061
1062
1063 msg->msg_flags |= MSG_OOB;
1064
1065 if (len > 0) {
1066 if (!(flags & MSG_TRUNC))
1067 err = memcpy_toiovec(msg->msg_iov, &c, 1);
1068 len = 1;
1069 } else
1070 msg->msg_flags |= MSG_TRUNC;
1071
1072 return err ? -EFAULT : len;
1073 }
1074
1075 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN))
1076 return 0;
1077
1078
1079
1080
1081
1082
1083
1084 return -EAGAIN;
1085}
1086
1087
1088
1089
1090
1091
1092
1093void tcp_cleanup_rbuf(struct sock *sk, int copied)
1094{
1095 struct tcp_sock *tp = tcp_sk(sk);
1096 int time_to_ack = 0;
1097
1098#if TCP_DEBUG
1099 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
1100
1101 WARN_ON(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq));
1102#endif
1103
1104 if (inet_csk_ack_scheduled(sk)) {
1105 const struct inet_connection_sock *icsk = inet_csk(sk);
1106
1107
1108 if (icsk->icsk_ack.blocked ||
1109
1110 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss ||
1111
1112
1113
1114
1115
1116
1117 (copied > 0 &&
1118 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
1119 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
1120 !icsk->icsk_ack.pingpong)) &&
1121 !atomic_read(&sk->sk_rmem_alloc)))
1122 time_to_ack = 1;
1123 }
1124
1125
1126
1127
1128
1129
1130
1131 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
1132 __u32 rcv_window_now = tcp_receive_window(tp);
1133
1134
1135 if (2*rcv_window_now <= tp->window_clamp) {
1136 __u32 new_window = __tcp_select_window(sk);
1137
1138
1139
1140
1141
1142
1143 if (new_window && new_window >= 2 * rcv_window_now)
1144 time_to_ack = 1;
1145 }
1146 }
1147 if (time_to_ack)
1148 tcp_send_ack(sk);
1149}
1150
1151static void tcp_prequeue_process(struct sock *sk)
1152{
1153 struct sk_buff *skb;
1154 struct tcp_sock *tp = tcp_sk(sk);
1155
1156 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPPREQUEUED);
1157
1158
1159
1160 local_bh_disable();
1161 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1162 sk_backlog_rcv(sk, skb);
1163 local_bh_enable();
1164
1165
1166 tp->ucopy.memory = 0;
1167}
1168
1169static inline struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1170{
1171 struct sk_buff *skb;
1172 u32 offset;
1173
1174 skb_queue_walk(&sk->sk_receive_queue, skb) {
1175 offset = seq - TCP_SKB_CB(skb)->seq;
1176 if (tcp_hdr(skb)->syn)
1177 offset--;
1178 if (offset < skb->len || tcp_hdr(skb)->fin) {
1179 *off = offset;
1180 return skb;
1181 }
1182 }
1183 return NULL;
1184}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1198 sk_read_actor_t recv_actor)
1199{
1200 struct sk_buff *skb;
1201 struct tcp_sock *tp = tcp_sk(sk);
1202 u32 seq = tp->copied_seq;
1203 u32 offset;
1204 int copied = 0;
1205
1206 if (sk->sk_state == TCP_LISTEN)
1207 return -ENOTCONN;
1208 while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
1209 if (offset < skb->len) {
1210 int used;
1211 size_t len;
1212
1213 len = skb->len - offset;
1214
1215 if (tp->urg_data) {
1216 u32 urg_offset = tp->urg_seq - seq;
1217 if (urg_offset < len)
1218 len = urg_offset;
1219 if (!len)
1220 break;
1221 }
1222 used = recv_actor(desc, skb, offset, len);
1223 if (used < 0) {
1224 if (!copied)
1225 copied = used;
1226 break;
1227 } else if (used <= len) {
1228 seq += used;
1229 copied += used;
1230 offset += used;
1231 }
1232
1233
1234
1235
1236
1237
1238 skb = tcp_recv_skb(sk, seq-1, &offset);
1239 if (!skb || (offset+1 != skb->len))
1240 break;
1241 }
1242 if (tcp_hdr(skb)->fin) {
1243 sk_eat_skb(sk, skb, 0);
1244 ++seq;
1245 break;
1246 }
1247 sk_eat_skb(sk, skb, 0);
1248 if (!desc->count)
1249 break;
1250 }
1251 tp->copied_seq = seq;
1252
1253 tcp_rcv_space_adjust(sk);
1254
1255
1256 if (copied > 0)
1257 tcp_cleanup_rbuf(sk, copied);
1258 return copied;
1259}
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1270 size_t len, int nonblock, int flags, int *addr_len)
1271{
1272 struct tcp_sock *tp = tcp_sk(sk);
1273 int copied = 0;
1274 u32 peek_seq;
1275 u32 *seq;
1276 unsigned long used;
1277 int err;
1278 int target;
1279 long timeo;
1280 struct task_struct *user_recv = NULL;
1281 int copied_early = 0;
1282 struct sk_buff *skb;
1283
1284 lock_sock(sk);
1285
1286 TCP_CHECK_TIMER(sk);
1287
1288 err = -ENOTCONN;
1289 if (sk->sk_state == TCP_LISTEN)
1290 goto out;
1291
1292 timeo = sock_rcvtimeo(sk, nonblock);
1293
1294
1295 if (flags & MSG_OOB)
1296 goto recv_urg;
1297
1298 seq = &tp->copied_seq;
1299 if (flags & MSG_PEEK) {
1300 peek_seq = tp->copied_seq;
1301 seq = &peek_seq;
1302 }
1303
1304 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1305
1306#ifdef CONFIG_NET_DMA
1307 tp->ucopy.dma_chan = NULL;
1308 preempt_disable();
1309 skb = skb_peek_tail(&sk->sk_receive_queue);
1310 {
1311 int available = 0;
1312
1313 if (skb)
1314 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1315 if ((available < target) &&
1316 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1317 !sysctl_tcp_low_latency &&
1318 __get_cpu_var(softnet_data).net_dma) {
1319 preempt_enable_no_resched();
1320 tp->ucopy.pinned_list =
1321 dma_pin_iovec_pages(msg->msg_iov, len);
1322 } else {
1323 preempt_enable_no_resched();
1324 }
1325 }
1326#endif
1327
1328 do {
1329 u32 offset;
1330
1331
1332 if (tp->urg_data && tp->urg_seq == *seq) {
1333 if (copied)
1334 break;
1335 if (signal_pending(current)) {
1336 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
1337 break;
1338 }
1339 }
1340
1341
1342
1343 skb = skb_peek(&sk->sk_receive_queue);
1344 do {
1345 if (!skb)
1346 break;
1347
1348
1349
1350
1351 if (before(*seq, TCP_SKB_CB(skb)->seq)) {
1352 printk(KERN_INFO "recvmsg bug: copied %X "
1353 "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
1354 break;
1355 }
1356 offset = *seq - TCP_SKB_CB(skb)->seq;
1357 if (tcp_hdr(skb)->syn)
1358 offset--;
1359 if (offset < skb->len)
1360 goto found_ok_skb;
1361 if (tcp_hdr(skb)->fin)
1362 goto found_fin_ok;
1363 WARN_ON(!(flags & MSG_PEEK));
1364 skb = skb->next;
1365 } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
1366
1367
1368
1369 if (copied >= target && !sk->sk_backlog.tail)
1370 break;
1371
1372 if (copied) {
1373 if (sk->sk_err ||
1374 sk->sk_state == TCP_CLOSE ||
1375 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1376 !timeo ||
1377 signal_pending(current))
1378 break;
1379 } else {
1380 if (sock_flag(sk, SOCK_DONE))
1381 break;
1382
1383 if (sk->sk_err) {
1384 copied = sock_error(sk);
1385 break;
1386 }
1387
1388 if (sk->sk_shutdown & RCV_SHUTDOWN)
1389 break;
1390
1391 if (sk->sk_state == TCP_CLOSE) {
1392 if (!sock_flag(sk, SOCK_DONE)) {
1393
1394
1395
1396 copied = -ENOTCONN;
1397 break;
1398 }
1399 break;
1400 }
1401
1402 if (!timeo) {
1403 copied = -EAGAIN;
1404 break;
1405 }
1406
1407 if (signal_pending(current)) {
1408 copied = sock_intr_errno(timeo);
1409 break;
1410 }
1411 }
1412
1413 tcp_cleanup_rbuf(sk, copied);
1414
1415 if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
1416
1417 if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
1418 user_recv = current;
1419 tp->ucopy.task = user_recv;
1420 tp->ucopy.iov = msg->msg_iov;
1421 }
1422
1423 tp->ucopy.len = len;
1424
1425 WARN_ON(tp->copied_seq != tp->rcv_nxt &&
1426 !(flags & (MSG_PEEK | MSG_TRUNC)));
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454 if (!skb_queue_empty(&tp->ucopy.prequeue))
1455 goto do_prequeue;
1456
1457
1458 }
1459
1460 if (copied >= target) {
1461
1462 release_sock(sk);
1463 lock_sock(sk);
1464 } else
1465 sk_wait_data(sk, &timeo);
1466
1467#ifdef CONFIG_NET_DMA
1468 tp->ucopy.wakeup = 0;
1469#endif
1470
1471 if (user_recv) {
1472 int chunk;
1473
1474
1475
1476 if ((chunk = len - tp->ucopy.len) != 0) {
1477 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
1478 len -= chunk;
1479 copied += chunk;
1480 }
1481
1482 if (tp->rcv_nxt == tp->copied_seq &&
1483 !skb_queue_empty(&tp->ucopy.prequeue)) {
1484do_prequeue:
1485 tcp_prequeue_process(sk);
1486
1487 if ((chunk = len - tp->ucopy.len) != 0) {
1488 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1489 len -= chunk;
1490 copied += chunk;
1491 }
1492 }
1493 }
1494 if ((flags & MSG_PEEK) && peek_seq != tp->copied_seq) {
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
1497 current->comm, task_pid_nr(current));
1498 peek_seq = tp->copied_seq;
1499 }
1500 continue;
1501
1502 found_ok_skb:
1503
1504 used = skb->len - offset;
1505 if (len < used)
1506 used = len;
1507
1508
1509 if (tp->urg_data) {
1510 u32 urg_offset = tp->urg_seq - *seq;
1511 if (urg_offset < used) {
1512 if (!urg_offset) {
1513 if (!sock_flag(sk, SOCK_URGINLINE)) {
1514 ++*seq;
1515 offset++;
1516 used--;
1517 if (!used)
1518 goto skip_copy;
1519 }
1520 } else
1521 used = urg_offset;
1522 }
1523 }
1524
1525 if (!(flags & MSG_TRUNC)) {
1526#ifdef CONFIG_NET_DMA
1527 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1528 tp->ucopy.dma_chan = get_softnet_dma();
1529
1530 if (tp->ucopy.dma_chan) {
1531 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
1532 tp->ucopy.dma_chan, skb, offset,
1533 msg->msg_iov, used,
1534 tp->ucopy.pinned_list);
1535
1536 if (tp->ucopy.dma_cookie < 0) {
1537
1538 printk(KERN_ALERT "dma_cookie < 0\n");
1539
1540
1541 if (!copied)
1542 copied = -EFAULT;
1543 break;
1544 }
1545 if ((offset + used) == skb->len)
1546 copied_early = 1;
1547
1548 } else
1549#endif
1550 {
1551 err = skb_copy_datagram_iovec(skb, offset,
1552 msg->msg_iov, used);
1553 if (err) {
1554
1555 if (!copied)
1556 copied = -EFAULT;
1557 break;
1558 }
1559 }
1560 }
1561
1562 *seq += used;
1563 copied += used;
1564 len -= used;
1565
1566 tcp_rcv_space_adjust(sk);
1567
1568skip_copy:
1569 if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {
1570 tp->urg_data = 0;
1571 tcp_fast_path_check(sk);
1572 }
1573 if (used + offset < skb->len)
1574 continue;
1575
1576 if (tcp_hdr(skb)->fin)
1577 goto found_fin_ok;
1578 if (!(flags & MSG_PEEK)) {
1579 sk_eat_skb(sk, skb, copied_early);
1580 copied_early = 0;
1581 }
1582 continue;
1583
1584 found_fin_ok:
1585
1586 ++*seq;
1587 if (!(flags & MSG_PEEK)) {
1588 sk_eat_skb(sk, skb, copied_early);
1589 copied_early = 0;
1590 }
1591 break;
1592 } while (len > 0);
1593
1594 if (user_recv) {
1595 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1596 int chunk;
1597
1598 tp->ucopy.len = copied > 0 ? len : 0;
1599
1600 tcp_prequeue_process(sk);
1601
1602 if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) {
1603 NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
1604 len -= chunk;
1605 copied += chunk;
1606 }
1607 }
1608
1609 tp->ucopy.task = NULL;
1610 tp->ucopy.len = 0;
1611 }
1612
1613#ifdef CONFIG_NET_DMA
1614 if (tp->ucopy.dma_chan) {
1615 dma_cookie_t done, used;
1616
1617 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
1618
1619 while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
1620 tp->ucopy.dma_cookie, &done,
1621 &used) == DMA_IN_PROGRESS) {
1622
1623 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1624 (dma_async_is_complete(skb->dma_cookie, done,
1625 used) == DMA_SUCCESS)) {
1626 __skb_dequeue(&sk->sk_async_wait_queue);
1627 kfree_skb(skb);
1628 }
1629 }
1630
1631
1632 __skb_queue_purge(&sk->sk_async_wait_queue);
1633 dma_chan_put(tp->ucopy.dma_chan);
1634 tp->ucopy.dma_chan = NULL;
1635 }
1636 if (tp->ucopy.pinned_list) {
1637 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
1638 tp->ucopy.pinned_list = NULL;
1639 }
1640#endif
1641
1642
1643
1644
1645
1646
1647 tcp_cleanup_rbuf(sk, copied);
1648
1649 TCP_CHECK_TIMER(sk);
1650 release_sock(sk);
1651 return copied;
1652
1653out:
1654 TCP_CHECK_TIMER(sk);
1655 release_sock(sk);
1656 return err;
1657
1658recv_urg:
1659 err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len);
1660 goto out;
1661}
1662
1663void tcp_set_state(struct sock *sk, int state)
1664{
1665 int oldstate = sk->sk_state;
1666
1667 switch (state) {
1668 case TCP_ESTABLISHED:
1669 if (oldstate != TCP_ESTABLISHED)
1670 TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1671 break;
1672
1673 case TCP_CLOSE:
1674 if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED)
1675 TCP_INC_STATS(sock_net(sk), TCP_MIB_ESTABRESETS);
1676
1677 sk->sk_prot->unhash(sk);
1678 if (inet_csk(sk)->icsk_bind_hash &&
1679 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
1680 inet_put_port(sk);
1681
1682 default:
1683 if (oldstate==TCP_ESTABLISHED)
1684 TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB);
1685 }
1686
1687
1688
1689
1690 sk->sk_state = state;
1691
1692#ifdef STATE_TRACE
1693 SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n",sk, statename[oldstate],statename[state]);
1694#endif
1695}
1696EXPORT_SYMBOL_GPL(tcp_set_state);
1697
1698
1699
1700
1701
1702
1703
1704
1705static const unsigned char new_state[16] = {
1706
1707 TCP_CLOSE,
1708 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1709 TCP_CLOSE,
1710 TCP_FIN_WAIT1 | TCP_ACTION_FIN,
1711 TCP_FIN_WAIT1,
1712 TCP_FIN_WAIT2,
1713 TCP_CLOSE,
1714 TCP_CLOSE,
1715 TCP_LAST_ACK | TCP_ACTION_FIN,
1716 TCP_LAST_ACK,
1717 TCP_CLOSE,
1718 TCP_CLOSING,
1719};
1720
1721static int tcp_close_state(struct sock *sk)
1722{
1723 int next = (int)new_state[sk->sk_state];
1724 int ns = next & TCP_STATE_MASK;
1725
1726 tcp_set_state(sk, ns);
1727
1728 return next & TCP_ACTION_FIN;
1729}
1730
1731
1732
1733
1734
1735
1736void tcp_shutdown(struct sock *sk, int how)
1737{
1738
1739
1740
1741
1742 if (!(how & SEND_SHUTDOWN))
1743 return;
1744
1745
1746 if ((1 << sk->sk_state) &
1747 (TCPF_ESTABLISHED | TCPF_SYN_SENT |
1748 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
1749
1750 if (tcp_close_state(sk))
1751 tcp_send_fin(sk);
1752 }
1753}
1754
1755void tcp_close(struct sock *sk, long timeout)
1756{
1757 struct sk_buff *skb;
1758 int data_was_unread = 0;
1759 int state;
1760
1761 lock_sock(sk);
1762 sk->sk_shutdown = SHUTDOWN_MASK;
1763
1764 if (sk->sk_state == TCP_LISTEN) {
1765 tcp_set_state(sk, TCP_CLOSE);
1766
1767
1768 inet_csk_listen_stop(sk);
1769
1770 goto adjudge_to_death;
1771 }
1772
1773
1774
1775
1776
1777 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
1778 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq -
1779 tcp_hdr(skb)->fin;
1780 data_was_unread += len;
1781 __kfree_skb(skb);
1782 }
1783
1784 sk_mem_reclaim(sk);
1785
1786
1787
1788
1789
1790
1791
1792
1793 if (data_was_unread) {
1794
1795 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
1796 tcp_set_state(sk, TCP_CLOSE);
1797 tcp_send_active_reset(sk, GFP_KERNEL);
1798 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
1799
1800 sk->sk_prot->disconnect(sk, 0);
1801 NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
1802 } else if (tcp_close_state(sk)) {
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828 tcp_send_fin(sk);
1829 }
1830
1831 sk_stream_wait_close(sk, timeout);
1832
1833adjudge_to_death:
1834 state = sk->sk_state;
1835 sock_hold(sk);
1836 sock_orphan(sk);
1837 atomic_inc(sk->sk_prot->orphan_count);
1838
1839
1840 release_sock(sk);
1841
1842
1843
1844
1845
1846 local_bh_disable();
1847 bh_lock_sock(sk);
1848 WARN_ON(sock_owned_by_user(sk));
1849
1850
1851 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
1852 goto out;
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868 if (sk->sk_state == TCP_FIN_WAIT2) {
1869 struct tcp_sock *tp = tcp_sk(sk);
1870 if (tp->linger2 < 0) {
1871 tcp_set_state(sk, TCP_CLOSE);
1872 tcp_send_active_reset(sk, GFP_ATOMIC);
1873 NET_INC_STATS_BH(sock_net(sk),
1874 LINUX_MIB_TCPABORTONLINGER);
1875 } else {
1876 const int tmo = tcp_fin_time(sk);
1877
1878 if (tmo > TCP_TIMEWAIT_LEN) {
1879 inet_csk_reset_keepalive_timer(sk,
1880 tmo - TCP_TIMEWAIT_LEN);
1881 } else {
1882 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
1883 goto out;
1884 }
1885 }
1886 }
1887 if (sk->sk_state != TCP_CLOSE) {
1888 sk_mem_reclaim(sk);
1889 if (tcp_too_many_orphans(sk,
1890 atomic_read(sk->sk_prot->orphan_count))) {
1891 if (net_ratelimit())
1892 printk(KERN_INFO "TCP: too many of orphaned "
1893 "sockets\n");
1894 tcp_set_state(sk, TCP_CLOSE);
1895 tcp_send_active_reset(sk, GFP_ATOMIC);
1896 NET_INC_STATS_BH(sock_net(sk),
1897 LINUX_MIB_TCPABORTONMEMORY);
1898 }
1899 }
1900
1901 if (sk->sk_state == TCP_CLOSE)
1902 inet_csk_destroy_sock(sk);
1903
1904
1905out:
1906 bh_unlock_sock(sk);
1907 local_bh_enable();
1908 sock_put(sk);
1909}
1910
1911
1912
1913static inline int tcp_need_reset(int state)
1914{
1915 return (1 << state) &
1916 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_FIN_WAIT1 |
1917 TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
1918}
1919
1920int tcp_disconnect(struct sock *sk, int flags)
1921{
1922 struct inet_sock *inet = inet_sk(sk);
1923 struct inet_connection_sock *icsk = inet_csk(sk);
1924 struct tcp_sock *tp = tcp_sk(sk);
1925 int err = 0;
1926 int old_state = sk->sk_state;
1927
1928 if (old_state != TCP_CLOSE)
1929 tcp_set_state(sk, TCP_CLOSE);
1930
1931
1932 if (old_state == TCP_LISTEN) {
1933 inet_csk_listen_stop(sk);
1934 } else if (tcp_need_reset(old_state) ||
1935 (tp->snd_nxt != tp->write_seq &&
1936 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
1937
1938
1939
1940 tcp_send_active_reset(sk, gfp_any());
1941 sk->sk_err = ECONNRESET;
1942 } else if (old_state == TCP_SYN_SENT)
1943 sk->sk_err = ECONNRESET;
1944
1945 tcp_clear_xmit_timers(sk);
1946 __skb_queue_purge(&sk->sk_receive_queue);
1947 tcp_write_queue_purge(sk);
1948 __skb_queue_purge(&tp->out_of_order_queue);
1949#ifdef CONFIG_NET_DMA
1950 __skb_queue_purge(&sk->sk_async_wait_queue);
1951#endif
1952
1953 inet->dport = 0;
1954
1955 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
1956 inet_reset_saddr(sk);
1957
1958 sk->sk_shutdown = 0;
1959 sock_reset_flag(sk, SOCK_DONE);
1960 tp->srtt = 0;
1961 if ((tp->write_seq += tp->max_window + 2) == 0)
1962 tp->write_seq = 1;
1963 icsk->icsk_backoff = 0;
1964 tp->snd_cwnd = 2;
1965 icsk->icsk_probes_out = 0;
1966 tp->packets_out = 0;
1967 tp->snd_ssthresh = 0x7fffffff;
1968 tp->snd_cwnd_cnt = 0;
1969 tp->bytes_acked = 0;
1970 tcp_set_ca_state(sk, TCP_CA_Open);
1971 tcp_clear_retrans(tp);
1972 inet_csk_delack_init(sk);
1973 tcp_init_send_head(sk);
1974 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
1975 __sk_dst_reset(sk);
1976
1977 WARN_ON(inet->num && !icsk->icsk_bind_hash);
1978
1979 sk->sk_error_report(sk);
1980 return err;
1981}
1982
1983
1984
1985
1986static int do_tcp_setsockopt(struct sock *sk, int level,
1987 int optname, char __user *optval, int optlen)
1988{
1989 struct tcp_sock *tp = tcp_sk(sk);
1990 struct inet_connection_sock *icsk = inet_csk(sk);
1991 int val;
1992 int err = 0;
1993
1994
1995 if (optname == TCP_CONGESTION) {
1996 char name[TCP_CA_NAME_MAX];
1997
1998 if (optlen < 1)
1999 return -EINVAL;
2000
2001 val = strncpy_from_user(name, optval,
2002 min(TCP_CA_NAME_MAX-1, optlen));
2003 if (val < 0)
2004 return -EFAULT;
2005 name[val] = 0;
2006
2007 lock_sock(sk);
2008 err = tcp_set_congestion_control(sk, name);
2009 release_sock(sk);
2010 return err;
2011 }
2012
2013 if (optlen < sizeof(int))
2014 return -EINVAL;
2015
2016 if (get_user(val, (int __user *)optval))
2017 return -EFAULT;
2018
2019 lock_sock(sk);
2020
2021 switch (optname) {
2022 case TCP_MAXSEG:
2023
2024
2025
2026 if (val < 8 || val > MAX_TCP_WINDOW) {
2027 err = -EINVAL;
2028 break;
2029 }
2030 tp->rx_opt.user_mss = val;
2031 break;
2032
2033 case TCP_NODELAY:
2034 if (val) {
2035
2036
2037
2038
2039
2040
2041
2042
2043 tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
2044 tcp_push_pending_frames(sk);
2045 } else {
2046 tp->nonagle &= ~TCP_NAGLE_OFF;
2047 }
2048 break;
2049
2050 case TCP_CORK:
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062 if (val) {
2063 tp->nonagle |= TCP_NAGLE_CORK;
2064 } else {
2065 tp->nonagle &= ~TCP_NAGLE_CORK;
2066 if (tp->nonagle&TCP_NAGLE_OFF)
2067 tp->nonagle |= TCP_NAGLE_PUSH;
2068 tcp_push_pending_frames(sk);
2069 }
2070 break;
2071
2072 case TCP_KEEPIDLE:
2073 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2074 err = -EINVAL;
2075 else {
2076 tp->keepalive_time = val * HZ;
2077 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2078 !((1 << sk->sk_state) &
2079 (TCPF_CLOSE | TCPF_LISTEN))) {
2080 __u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
2081 if (tp->keepalive_time > elapsed)
2082 elapsed = tp->keepalive_time - elapsed;
2083 else
2084 elapsed = 0;
2085 inet_csk_reset_keepalive_timer(sk, elapsed);
2086 }
2087 }
2088 break;
2089 case TCP_KEEPINTVL:
2090 if (val < 1 || val > MAX_TCP_KEEPINTVL)
2091 err = -EINVAL;
2092 else
2093 tp->keepalive_intvl = val * HZ;
2094 break;
2095 case TCP_KEEPCNT:
2096 if (val < 1 || val > MAX_TCP_KEEPCNT)
2097 err = -EINVAL;
2098 else
2099 tp->keepalive_probes = val;
2100 break;
2101 case TCP_SYNCNT:
2102 if (val < 1 || val > MAX_TCP_SYNCNT)
2103 err = -EINVAL;
2104 else
2105 icsk->icsk_syn_retries = val;
2106 break;
2107
2108 case TCP_LINGER2:
2109 if (val < 0)
2110 tp->linger2 = -1;
2111 else if (val > sysctl_tcp_fin_timeout / HZ)
2112 tp->linger2 = 0;
2113 else
2114 tp->linger2 = val * HZ;
2115 break;
2116
2117 case TCP_DEFER_ACCEPT:
2118 icsk->icsk_accept_queue.rskq_defer_accept = 0;
2119 if (val > 0) {
2120
2121
2122 while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
2123 val > ((TCP_TIMEOUT_INIT / HZ) <<
2124 icsk->icsk_accept_queue.rskq_defer_accept))
2125 icsk->icsk_accept_queue.rskq_defer_accept++;
2126 icsk->icsk_accept_queue.rskq_defer_accept++;
2127 }
2128 break;
2129
2130 case TCP_WINDOW_CLAMP:
2131 if (!val) {
2132 if (sk->sk_state != TCP_CLOSE) {
2133 err = -EINVAL;
2134 break;
2135 }
2136 tp->window_clamp = 0;
2137 } else
2138 tp->window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
2139 SOCK_MIN_RCVBUF / 2 : val;
2140 break;
2141
2142 case TCP_QUICKACK:
2143 if (!val) {
2144 icsk->icsk_ack.pingpong = 1;
2145 } else {
2146 icsk->icsk_ack.pingpong = 0;
2147 if ((1 << sk->sk_state) &
2148 (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
2149 inet_csk_ack_scheduled(sk)) {
2150 icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
2151 tcp_cleanup_rbuf(sk, 1);
2152 if (!(val & 1))
2153 icsk->icsk_ack.pingpong = 1;
2154 }
2155 }
2156 break;
2157
2158#ifdef CONFIG_TCP_MD5SIG
2159 case TCP_MD5SIG:
2160
2161 err = tp->af_specific->md5_parse(sk, optval, optlen);
2162 break;
2163#endif
2164
2165 default:
2166 err = -ENOPROTOOPT;
2167 break;
2168 }
2169
2170 release_sock(sk);
2171 return err;
2172}
2173
2174int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
2175 int optlen)
2176{
2177 struct inet_connection_sock *icsk = inet_csk(sk);
2178
2179 if (level != SOL_TCP)
2180 return icsk->icsk_af_ops->setsockopt(sk, level, optname,
2181 optval, optlen);
2182 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2183}
2184
2185#ifdef CONFIG_COMPAT
2186int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
2187 char __user *optval, int optlen)
2188{
2189 if (level != SOL_TCP)
2190 return inet_csk_compat_setsockopt(sk, level, optname,
2191 optval, optlen);
2192 return do_tcp_setsockopt(sk, level, optname, optval, optlen);
2193}
2194
2195EXPORT_SYMBOL(compat_tcp_setsockopt);
2196#endif
2197
2198
2199void tcp_get_info(struct sock *sk, struct tcp_info *info)
2200{
2201 struct tcp_sock *tp = tcp_sk(sk);
2202 const struct inet_connection_sock *icsk = inet_csk(sk);
2203 u32 now = tcp_time_stamp;
2204
2205 memset(info, 0, sizeof(*info));
2206
2207 info->tcpi_state = sk->sk_state;
2208 info->tcpi_ca_state = icsk->icsk_ca_state;
2209 info->tcpi_retransmits = icsk->icsk_retransmits;
2210 info->tcpi_probes = icsk->icsk_probes_out;
2211 info->tcpi_backoff = icsk->icsk_backoff;
2212
2213 if (tp->rx_opt.tstamp_ok)
2214 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
2215 if (tcp_is_sack(tp))
2216 info->tcpi_options |= TCPI_OPT_SACK;
2217 if (tp->rx_opt.wscale_ok) {
2218 info->tcpi_options |= TCPI_OPT_WSCALE;
2219 info->tcpi_snd_wscale = tp->rx_opt.snd_wscale;
2220 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
2221 }
2222
2223 if (tp->ecn_flags&TCP_ECN_OK)
2224 info->tcpi_options |= TCPI_OPT_ECN;
2225
2226 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
2227 info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
2228 info->tcpi_snd_mss = tp->mss_cache;
2229 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
2230
2231 if (sk->sk_state == TCP_LISTEN) {
2232 info->tcpi_unacked = sk->sk_ack_backlog;
2233 info->tcpi_sacked = sk->sk_max_ack_backlog;
2234 } else {
2235 info->tcpi_unacked = tp->packets_out;
2236 info->tcpi_sacked = tp->sacked_out;
2237 }
2238 info->tcpi_lost = tp->lost_out;
2239 info->tcpi_retrans = tp->retrans_out;
2240 info->tcpi_fackets = tp->fackets_out;
2241
2242 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
2243 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
2244 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
2245
2246 info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
2247 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
2248 info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
2249 info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
2250 info->tcpi_snd_ssthresh = tp->snd_ssthresh;
2251 info->tcpi_snd_cwnd = tp->snd_cwnd;
2252 info->tcpi_advmss = tp->advmss;
2253 info->tcpi_reordering = tp->reordering;
2254
2255 info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3;
2256 info->tcpi_rcv_space = tp->rcvq_space.space;
2257
2258 info->tcpi_total_retrans = tp->total_retrans;
2259}
2260
2261EXPORT_SYMBOL_GPL(tcp_get_info);
2262
2263static int do_tcp_getsockopt(struct sock *sk, int level,
2264 int optname, char __user *optval, int __user *optlen)
2265{
2266 struct inet_connection_sock *icsk = inet_csk(sk);
2267 struct tcp_sock *tp = tcp_sk(sk);
2268 int val, len;
2269
2270 if (get_user(len, optlen))
2271 return -EFAULT;
2272
2273 len = min_t(unsigned int, len, sizeof(int));
2274
2275 if (len < 0)
2276 return -EINVAL;
2277
2278 switch (optname) {
2279 case TCP_MAXSEG:
2280 val = tp->mss_cache;
2281 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2282 val = tp->rx_opt.user_mss;
2283 break;
2284 case TCP_NODELAY:
2285 val = !!(tp->nonagle&TCP_NAGLE_OFF);
2286 break;
2287 case TCP_CORK:
2288 val = !!(tp->nonagle&TCP_NAGLE_CORK);
2289 break;
2290 case TCP_KEEPIDLE:
2291 val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
2292 break;
2293 case TCP_KEEPINTVL:
2294 val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ;
2295 break;
2296 case TCP_KEEPCNT:
2297 val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
2298 break;
2299 case TCP_SYNCNT:
2300 val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
2301 break;
2302 case TCP_LINGER2:
2303 val = tp->linger2;
2304 if (val >= 0)
2305 val = (val ? : sysctl_tcp_fin_timeout) / HZ;
2306 break;
2307 case TCP_DEFER_ACCEPT:
2308 val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
2309 ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
2310 break;
2311 case TCP_WINDOW_CLAMP:
2312 val = tp->window_clamp;
2313 break;
2314 case TCP_INFO: {
2315 struct tcp_info info;
2316
2317 if (get_user(len, optlen))
2318 return -EFAULT;
2319
2320 tcp_get_info(sk, &info);
2321
2322 len = min_t(unsigned int, len, sizeof(info));
2323 if (put_user(len, optlen))
2324 return -EFAULT;
2325 if (copy_to_user(optval, &info, len))
2326 return -EFAULT;
2327 return 0;
2328 }
2329 case TCP_QUICKACK:
2330 val = !icsk->icsk_ack.pingpong;
2331 break;
2332
2333 case TCP_CONGESTION:
2334 if (get_user(len, optlen))
2335 return -EFAULT;
2336 len = min_t(unsigned int, len, TCP_CA_NAME_MAX);
2337 if (put_user(len, optlen))
2338 return -EFAULT;
2339 if (copy_to_user(optval, icsk->icsk_ca_ops->name, len))
2340 return -EFAULT;
2341 return 0;
2342 default:
2343 return -ENOPROTOOPT;
2344 }
2345
2346 if (put_user(len, optlen))
2347 return -EFAULT;
2348 if (copy_to_user(optval, &val, len))
2349 return -EFAULT;
2350 return 0;
2351}
2352
2353int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2354 int __user *optlen)
2355{
2356 struct inet_connection_sock *icsk = inet_csk(sk);
2357
2358 if (level != SOL_TCP)
2359 return icsk->icsk_af_ops->getsockopt(sk, level, optname,
2360 optval, optlen);
2361 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2362}
2363
2364#ifdef CONFIG_COMPAT
2365int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
2366 char __user *optval, int __user *optlen)
2367{
2368 if (level != SOL_TCP)
2369 return inet_csk_compat_getsockopt(sk, level, optname,
2370 optval, optlen);
2371 return do_tcp_getsockopt(sk, level, optname, optval, optlen);
2372}
2373
2374EXPORT_SYMBOL(compat_tcp_getsockopt);
2375#endif
2376
2377struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
2378{
2379 struct sk_buff *segs = ERR_PTR(-EINVAL);
2380 struct tcphdr *th;
2381 unsigned thlen;
2382 unsigned int seq;
2383 __be32 delta;
2384 unsigned int oldlen;
2385 unsigned int len;
2386
2387 if (!pskb_may_pull(skb, sizeof(*th)))
2388 goto out;
2389
2390 th = tcp_hdr(skb);
2391 thlen = th->doff * 4;
2392 if (thlen < sizeof(*th))
2393 goto out;
2394
2395 if (!pskb_may_pull(skb, thlen))
2396 goto out;
2397
2398 oldlen = (u16)~skb->len;
2399 __skb_pull(skb, thlen);
2400
2401 if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
2402
2403 int type = skb_shinfo(skb)->gso_type;
2404 int mss;
2405
2406 if (unlikely(type &
2407 ~(SKB_GSO_TCPV4 |
2408 SKB_GSO_DODGY |
2409 SKB_GSO_TCP_ECN |
2410 SKB_GSO_TCPV6 |
2411 0) ||
2412 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
2413 goto out;
2414
2415 mss = skb_shinfo(skb)->gso_size;
2416 skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
2417
2418 segs = NULL;
2419 goto out;
2420 }
2421
2422 segs = skb_segment(skb, features);
2423 if (IS_ERR(segs))
2424 goto out;
2425
2426 len = skb_shinfo(skb)->gso_size;
2427 delta = htonl(oldlen + (thlen + len));
2428
2429 skb = segs;
2430 th = tcp_hdr(skb);
2431 seq = ntohl(th->seq);
2432
2433 do {
2434 th->fin = th->psh = 0;
2435
2436 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2437 (__force u32)delta));
2438 if (skb->ip_summed != CHECKSUM_PARTIAL)
2439 th->check =
2440 csum_fold(csum_partial(skb_transport_header(skb),
2441 thlen, skb->csum));
2442
2443 seq += len;
2444 skb = skb->next;
2445 th = tcp_hdr(skb);
2446
2447 th->seq = htonl(seq);
2448 th->cwr = 0;
2449 } while (skb->next);
2450
2451 delta = htonl(oldlen + (skb->tail - skb->transport_header) +
2452 skb->data_len);
2453 th->check = ~csum_fold((__force __wsum)((__force u32)th->check +
2454 (__force u32)delta));
2455 if (skb->ip_summed != CHECKSUM_PARTIAL)
2456 th->check = csum_fold(csum_partial(skb_transport_header(skb),
2457 thlen, skb->csum));
2458
2459out:
2460 return segs;
2461}
2462EXPORT_SYMBOL(tcp_tso_segment);
2463
2464#ifdef CONFIG_TCP_MD5SIG
2465static unsigned long tcp_md5sig_users;
2466static struct tcp_md5sig_pool **tcp_md5sig_pool;
2467static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
2468
2469static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool **pool)
2470{
2471 int cpu;
2472 for_each_possible_cpu(cpu) {
2473 struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
2474 if (p) {
2475 if (p->md5_desc.tfm)
2476 crypto_free_hash(p->md5_desc.tfm);
2477 kfree(p);
2478 p = NULL;
2479 }
2480 }
2481 free_percpu(pool);
2482}
2483
2484void tcp_free_md5sig_pool(void)
2485{
2486 struct tcp_md5sig_pool **pool = NULL;
2487
2488 spin_lock_bh(&tcp_md5sig_pool_lock);
2489 if (--tcp_md5sig_users == 0) {
2490 pool = tcp_md5sig_pool;
2491 tcp_md5sig_pool = NULL;
2492 }
2493 spin_unlock_bh(&tcp_md5sig_pool_lock);
2494 if (pool)
2495 __tcp_free_md5sig_pool(pool);
2496}
2497
2498EXPORT_SYMBOL(tcp_free_md5sig_pool);
2499
2500static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
2501{
2502 int cpu;
2503 struct tcp_md5sig_pool **pool;
2504
2505 pool = alloc_percpu(struct tcp_md5sig_pool *);
2506 if (!pool)
2507 return NULL;
2508
2509 for_each_possible_cpu(cpu) {
2510 struct tcp_md5sig_pool *p;
2511 struct crypto_hash *hash;
2512
2513 p = kzalloc(sizeof(*p), GFP_KERNEL);
2514 if (!p)
2515 goto out_free;
2516 *per_cpu_ptr(pool, cpu) = p;
2517
2518 hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
2519 if (!hash || IS_ERR(hash))
2520 goto out_free;
2521
2522 p->md5_desc.tfm = hash;
2523 }
2524 return pool;
2525out_free:
2526 __tcp_free_md5sig_pool(pool);
2527 return NULL;
2528}
2529
2530struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void)
2531{
2532 struct tcp_md5sig_pool **pool;
2533 int alloc = 0;
2534
2535retry:
2536 spin_lock_bh(&tcp_md5sig_pool_lock);
2537 pool = tcp_md5sig_pool;
2538 if (tcp_md5sig_users++ == 0) {
2539 alloc = 1;
2540 spin_unlock_bh(&tcp_md5sig_pool_lock);
2541 } else if (!pool) {
2542 tcp_md5sig_users--;
2543 spin_unlock_bh(&tcp_md5sig_pool_lock);
2544 cpu_relax();
2545 goto retry;
2546 } else
2547 spin_unlock_bh(&tcp_md5sig_pool_lock);
2548
2549 if (alloc) {
2550
2551 struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool();
2552 spin_lock_bh(&tcp_md5sig_pool_lock);
2553 if (!p) {
2554 tcp_md5sig_users--;
2555 spin_unlock_bh(&tcp_md5sig_pool_lock);
2556 return NULL;
2557 }
2558 pool = tcp_md5sig_pool;
2559 if (pool) {
2560
2561 spin_unlock_bh(&tcp_md5sig_pool_lock);
2562 __tcp_free_md5sig_pool(p);
2563 } else {
2564 tcp_md5sig_pool = pool = p;
2565 spin_unlock_bh(&tcp_md5sig_pool_lock);
2566 }
2567 }
2568 return pool;
2569}
2570
2571EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
2572
2573struct tcp_md5sig_pool *__tcp_get_md5sig_pool(int cpu)
2574{
2575 struct tcp_md5sig_pool **p;
2576 spin_lock_bh(&tcp_md5sig_pool_lock);
2577 p = tcp_md5sig_pool;
2578 if (p)
2579 tcp_md5sig_users++;
2580 spin_unlock_bh(&tcp_md5sig_pool_lock);
2581 return (p ? *per_cpu_ptr(p, cpu) : NULL);
2582}
2583
2584EXPORT_SYMBOL(__tcp_get_md5sig_pool);
2585
2586void __tcp_put_md5sig_pool(void)
2587{
2588 tcp_free_md5sig_pool();
2589}
2590
2591EXPORT_SYMBOL(__tcp_put_md5sig_pool);
2592
2593int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
2594 struct tcphdr *th)
2595{
2596 struct scatterlist sg;
2597 int err;
2598
2599 __sum16 old_checksum = th->check;
2600 th->check = 0;
2601
2602 sg_init_one(&sg, th, sizeof(struct tcphdr));
2603 err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
2604 th->check = old_checksum;
2605 return err;
2606}
2607
2608EXPORT_SYMBOL(tcp_md5_hash_header);
2609
2610int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
2611 struct sk_buff *skb, unsigned header_len)
2612{
2613 struct scatterlist sg;
2614 const struct tcphdr *tp = tcp_hdr(skb);
2615 struct hash_desc *desc = &hp->md5_desc;
2616 unsigned i;
2617 const unsigned head_data_len = skb_headlen(skb) > header_len ?
2618 skb_headlen(skb) - header_len : 0;
2619 const struct skb_shared_info *shi = skb_shinfo(skb);
2620
2621 sg_init_table(&sg, 1);
2622
2623 sg_set_buf(&sg, ((u8 *) tp) + header_len, head_data_len);
2624 if (crypto_hash_update(desc, &sg, head_data_len))
2625 return 1;
2626
2627 for (i = 0; i < shi->nr_frags; ++i) {
2628 const struct skb_frag_struct *f = &shi->frags[i];
2629 sg_set_page(&sg, f->page, f->size, f->page_offset);
2630 if (crypto_hash_update(desc, &sg, f->size))
2631 return 1;
2632 }
2633
2634 return 0;
2635}
2636
2637EXPORT_SYMBOL(tcp_md5_hash_skb_data);
2638
2639int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
2640{
2641 struct scatterlist sg;
2642
2643 sg_init_one(&sg, key->key, key->keylen);
2644 return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
2645}
2646
2647EXPORT_SYMBOL(tcp_md5_hash_key);
2648
2649#endif
2650
2651void tcp_done(struct sock *sk)
2652{
2653 if(sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
2654 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
2655
2656 tcp_set_state(sk, TCP_CLOSE);
2657 tcp_clear_xmit_timers(sk);
2658
2659 sk->sk_shutdown = SHUTDOWN_MASK;
2660
2661 if (!sock_flag(sk, SOCK_DEAD))
2662 sk->sk_state_change(sk);
2663 else
2664 inet_csk_destroy_sock(sk);
2665}
2666EXPORT_SYMBOL_GPL(tcp_done);
2667
2668extern struct tcp_congestion_ops tcp_reno;
2669
2670static __initdata unsigned long thash_entries;
2671static int __init set_thash_entries(char *str)
2672{
2673 if (!str)
2674 return 0;
2675 thash_entries = simple_strtoul(str, &str, 0);
2676 return 1;
2677}
2678__setup("thash_entries=", set_thash_entries);
2679
2680void __init tcp_init(void)
2681{
2682 struct sk_buff *skb = NULL;
2683 unsigned long nr_pages, limit;
2684 int order, i, max_share;
2685
2686 BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
2687
2688 tcp_hashinfo.bind_bucket_cachep =
2689 kmem_cache_create("tcp_bind_bucket",
2690 sizeof(struct inet_bind_bucket), 0,
2691 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
2692
2693
2694
2695
2696
2697
2698 tcp_hashinfo.ehash =
2699 alloc_large_system_hash("TCP established",
2700 sizeof(struct inet_ehash_bucket),
2701 thash_entries,
2702 (num_physpages >= 128 * 1024) ?
2703 13 : 15,
2704 0,
2705 &tcp_hashinfo.ehash_size,
2706 NULL,
2707 thash_entries ? 0 : 512 * 1024);
2708 tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size;
2709 for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
2710 INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain);
2711 INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain);
2712 }
2713 if (inet_ehash_locks_alloc(&tcp_hashinfo))
2714 panic("TCP: failed to alloc ehash_locks");
2715 tcp_hashinfo.bhash =
2716 alloc_large_system_hash("TCP bind",
2717 sizeof(struct inet_bind_hashbucket),
2718 tcp_hashinfo.ehash_size,
2719 (num_physpages >= 128 * 1024) ?
2720 13 : 15,
2721 0,
2722 &tcp_hashinfo.bhash_size,
2723 NULL,
2724 64 * 1024);
2725 tcp_hashinfo.bhash_size = 1 << tcp_hashinfo.bhash_size;
2726 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
2727 spin_lock_init(&tcp_hashinfo.bhash[i].lock);
2728 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
2729 }
2730
2731
2732
2733
2734 for (order = 0; ((1 << order) << PAGE_SHIFT) <
2735 (tcp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket));
2736 order++)
2737 ;
2738 if (order >= 4) {
2739 tcp_death_row.sysctl_max_tw_buckets = 180000;
2740 sysctl_tcp_max_orphans = 4096 << (order - 4);
2741 sysctl_max_syn_backlog = 1024;
2742 } else if (order < 3) {
2743 tcp_death_row.sysctl_max_tw_buckets >>= (3 - order);
2744 sysctl_tcp_max_orphans >>= (3 - order);
2745 sysctl_max_syn_backlog = 128;
2746 }
2747
2748
2749
2750
2751
2752 nr_pages = totalram_pages - totalhigh_pages;
2753 limit = min(nr_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
2754 limit = (limit * (nr_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
2755 limit = max(limit, 128UL);
2756 sysctl_tcp_mem[0] = limit / 4 * 3;
2757 sysctl_tcp_mem[1] = limit;
2758 sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
2759
2760
2761 limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
2762 max_share = min(4UL*1024*1024, limit);
2763
2764 sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
2765 sysctl_tcp_wmem[1] = 16*1024;
2766 sysctl_tcp_wmem[2] = max(64*1024, max_share);
2767
2768 sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
2769 sysctl_tcp_rmem[1] = 87380;
2770 sysctl_tcp_rmem[2] = max(87380, max_share);
2771
2772 printk(KERN_INFO "TCP: Hash tables configured "
2773 "(established %d bind %d)\n",
2774 tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size);
2775
2776 tcp_register_congestion_control(&tcp_reno);
2777}
2778
2779EXPORT_SYMBOL(tcp_close);
2780EXPORT_SYMBOL(tcp_disconnect);
2781EXPORT_SYMBOL(tcp_getsockopt);
2782EXPORT_SYMBOL(tcp_ioctl);
2783EXPORT_SYMBOL(tcp_poll);
2784EXPORT_SYMBOL(tcp_read_sock);
2785EXPORT_SYMBOL(tcp_recvmsg);
2786EXPORT_SYMBOL(tcp_sendmsg);
2787EXPORT_SYMBOL(tcp_splice_read);
2788EXPORT_SYMBOL(tcp_sendpage);
2789EXPORT_SYMBOL(tcp_setsockopt);
2790EXPORT_SYMBOL(tcp_shutdown);