linux tcp三次握手-SYN发送

概述

sys_connect->inet_stream_connect->inet_stream_connect->tcp_v4_connect->tcp_connect
对于tcp,
inet_stream_connect()调用tcp_v4_connect发送三次握手的第一次syn请求, 并根据socket是否阻塞来决定是否调用inet_wait_for_connect来等待

tcp_v4_connect

  • 调用ip_route_connect和ip_route_newports创建或者获取路由缓存,并决定发送地址/设备, 下一跳
  • 更新状态机TCP_CLOSE->TCP_SYN_SENT
  • inet_hash_connect(&tcp_death_row, sk); 如果socket没有bind到特定端口,这里选择端口进行bind, 如果是reuseport判断能否recycle tw
  • tp->write_seq = secure_tcp_sequence_number() 生产初始seq序号
  • tcp_connect()发送握手包
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
orig_sport = inet->inet_sport;
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, //根据fl4,查找或创建路由缓存
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk);
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { // tcp不支持多播和广播
ip_rt_put(rt);
return -ENETUNREACH;
}
if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr;
if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr; // 在ip_route_connect中被更新
sk_rcv_saddr_set(sk, inet->inet_saddr);
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
tp->write_seq = 0;
}
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst);
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
tcp_set_state(sk, TCP_SYN_SENT); // 更新状态机TCP_CLOSE->TCP_SYN_SENT
err = inet_hash_connect(&tcp_death_row, sk); //bind local port,tw_recycle
if (err)
goto failure;
sk_set_txhash(sk);
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, //分配完port后,再查一次
inet->inet_sport, inet->inet_dport, sk);
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst); // 获得设备的能力
//生成初始seq序号
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
inet->inet_id = tp->write_seq ^ jiffies;
err = tcp_connect(sk);
return 0;
}

inet_hash_connect

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
u32 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk); //没有bind端口,随机生成一个偏移,随机化端口分配过程
return __inet_hash_connect(death_row, sk, port_offset,
__inet_check_established);
}
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
int port = inet_sk(sk)->inet_num;
if (port) { //已经bind()端口
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { //也就是说只有自己bind到这个端口, 没有reuseport
inet_ehash_nolisten(sk, NULL); //插入ehash
spin_unlock_bh(&head->lock);
return 0;
}
spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
//否则检查ehash,查看bind到相同端口的socket是否进入timewait,进入timewait则判断是否能recycle,否则就是说还在连接状态或是没在ehash中
ret = check_established(death_row, sk, port, NULL); //__inet_check_established
local_bh_enable();
return ret;
}
inet_get_local_port_range(net, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
if (likely(remaining > 1))
remaining &= ~1U;
offset = (hint + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
offset &= ~1U;
other_parity_scan:
port = low + offset;
for (i = 0; i < remaining; i += 2, port += 2) {
if (unlikely(port >= high))
port -= remaining;
if (inet_is_local_reserved_port(net, port))
continue;
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
/* Does not bother with rcv_saddr checks, because
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
if (net_eq(ib_net(tb), net) && tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
WARN_ON(hlist_empty(&tb->owners));
if (!check_established(death_row, sk, //__inet_check_established
port, &tw)) //在ehash中查找timewait,如果满足五元组,并调用tcp_twsk_unique判断
goto ok;
goto next_port;
}
}
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
net, head, port);
if (!tb) {
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
next_port:
spin_unlock_bh(&head->lock);
cond_resched();
}
offset++;
if ((offset & 1) && remaining > 1)
goto other_parity_scan;
return -EADDRNOTAVAIL;
ok:
hint += i + 2;
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port); //设置snum和tb
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw); //删除tw,插入sk
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo); //删除tw的bind关系
spin_unlock(&head->lock);
if (tw)
inet_twsk_deschedule_put(tw); //回收tw
local_bh_enable();
return 0;
}
  1. 对于已经bind端口的socket
  • 判断是否有人reuseport, 如果只有自己bind到这个port, 则调用inet_ehash_nolisten(sk, NULL);插入ehash中
  • 如果有其他人bind到这个端口,则调用__inet_check_established,
    -确认其他人是否在ehash中,不在ehash中,则可以使用这个port
    -在ehash中,并存在满足五元组的timewait状态sk,则调用tcp_twsk_unique判断是否能被回收
    -在ehash中,但是满足五元组的sk不是timewait状态,则不能使用这个port来connect。 这个就说明了两个tcp connect(), 开启reuseport后bind到相同端口,bind()能成功,但是第二个connect会失败
  1. 对于没有bind端口的socket, 则需要尝试分配端口
  • inet_sk_port_offset随机生成一个port_offset, 通过port_offset来保证端口搜索区间的随机性, 遍历这个区间,尝试分配
  • 在bhash中查找是否有其他socket bind到这个端口上,没有则表示可以分配
  • 如果有其他socket在相同的bhash bucket上,调用__inet_check_established来确认是否能分配这个端口, 过程同上
  • 分配成功后inet_bind_hash,设置端口和bhash, 并调用inet_ehash_nolisten插入ehash中
  • 如果需要,还需要释放tw socket

tcp_connect

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int err;
tcp_connect_init(sk);//初始化tcp配置,选项,滑动窗口等
if (unlikely(tp->repair)) {
tcp_finish_connect(sk, NULL);
return 0;
}
buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, true); //分配skb, 并skb_reserve(MAX_TCP_HEADER)
if (unlikely(!buff))
return -ENOBUFS;
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); //设置seq=write_seq
tp->retrans_stamp = tcp_time_stamp;
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); //connect()是不带数据的,不会fastopen
if (err == -ECONNREFUSED)
return err;
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
return 0;
}
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
skb->ip_summed = CHECKSUM_PARTIAL; //设备计算
skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
tcp_skb_pcount_set(skb, 1);
TCP_SKB_CB(skb)->seq = seq; //write_seq作为seq
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
seq++; //syn或fin都占用一个序列号
TCP_SKB_CB(skb)->end_seq = seq; //结尾的序号的后一个
}
static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
tcb->end_seq += skb->len; //更新尾部序列号
__skb_header_release(skb);
__tcp_add_write_queue_tail(sk, skb); //添加到sk_write_queue
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
tp->write_seq = tcb->end_seq; //更新下一个发送的开始序号
tp->packets_out += tcp_skb_pcount(skb);
}

tcp_connect_init

tcp_connect_init主要是设置了tcp选项,比较重要的调用tcp_select_initial_window,确定了窗口大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
static void tcp_connect_init(struct sock *sk)
{
const struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
tp->tcp_header_len = sizeof(struct tcphdr) +
(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); //添加timestamp tcp选项
#ifdef CONFIG_TCP_MD5SIG
if (tp->af_specific->md5_lookup(sk, sk))
tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
tp->max_window = 0;
tcp_mtup_init(sk); //初始化mtu probe
tcp_sync_mss(sk, dst_mtu(dst));
tcp_ca_dst_init(sk, dst); //设置ca算法
if (!tp->window_clamp)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric_advmss(dst); //根据mtu计算通告对方的mss, 1460
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
tp->advmss = tp->rx_opt.user_mss;
tcp_initialize_rcv_mss(sk);
/* limit the window selection if the user enforce a smaller rx buffer */
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
(tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
tp->window_clamp = tcp_full_space(sk);
tcp_select_initial_window(tcp_full_space(sk), //sk_rcvbuf的一半,87380/2
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sysctl_tcp_window_scaling,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
sk->sk_err = 0;
sock_reset_flag(sk, SOCK_DONE);
tp->snd_wnd = 0;
tcp_init_wl(tp, 0);
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
tp->snd_nxt = tp->write_seq;
if (likely(!tp->repair))
tp->rcv_nxt = 0;
else
tp->rcv_tstamp = tcp_time_stamp;
tp->rcv_wup = tp->rcv_nxt;
tp->copied_seq = tp->rcv_nxt;
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
inet_csk(sk)->icsk_retransmits = 0;
tcp_clear_retrans(tp);
}

tcp_select_initial_window

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
void tcp_init_sock(struct sock* sk)
{
...
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; //87380
}
//sysctl_tcp_adv_win_scale默认为1, 也就是返回space的一半
static inline int tcp_win_from_space(int space)
{
return sysctl_tcp_adv_win_scale<=0 ?
(space>>(-sysctl_tcp_adv_win_scale)) :
space - (space>>sysctl_tcp_adv_win_scale);
}
static inline int tcp_full_space(const struct sock *sk)
{
return tcp_win_from_space(sk->sk_rcvbuf); //87380/2=43690
}
void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space); //43690
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
(*window_clamp) = (65535 << 14); //tcp 16位长度加上scale
space = min(*window_clamp, space); //43690
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
space = (space / mss) * mss; //mss=1460对齐, 42340
if (sysctl_tcp_workaround_signed_windows) //设置说明必须接收window scale option, 否则任务tcp broken, 默认关闭
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space; //42340
(*rcv_wscale) = 0;
if (wscale_ok) { //默认开启tcp win scale
/* Set window scaling on max possible window
* See RFC1323 for an explanation of the limit to 14
*/
space = max_t(u32, space, sysctl_tcp_rmem[2]); //6291456
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp); //6291456
while (space > 65535 && (*rcv_wscale) < 14) { //原本16bits,加上14位scale就能达到最大值
space >>= 1;
(*rcv_wscale)++;
}
}
if (mss > (1 << *rcv_wscale)) { // > 7
if (!init_rcv_wnd) /* Use default unless specified otherwise */
init_rcv_wnd = tcp_default_init_rwnd(mss);
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); //min(42340, 29200)
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}

最后算出tcp选项
rcv_wscale=7, 因此window_clamp=65535<<7
rcv_wnd=29200

tcp_transmit_skb

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk);
if (clone_it) { //clone一次向下传递, 因为这时候有可能skb已经被排入sk->sk_write_queue中了
skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una; //当前skb的seq发送后的in_flight
tcp_rate_skb_sent(sk, skb);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
}
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) //connect的syn
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); //设置tcp头选项
else
tcp_options_size = tcp_established_options(sk, skb, &opts,
&md5);
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
/* if no packet is in qdisc/device queue, then allow XPS to select
* another queue. We can be called from tcp_tsq_handler()
* which holds one reference to sk_wmem_alloc.
*
* TODO: Ideally, in-flight pure ACK packets should not matter here.
* One way to get this would be to set skb->truesize = 2 on them.
*/
skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1); //可以重新选择发送队列
skb_push(skb, tcp_header_size); //skb->data往head方向保留tcp_header_size
skb_reset_transport_header(skb);// 设置transport_header, 因为skb_reserve是按照最大TCP_MAX_HEAD预留的,所以这里很可能非0
skb_orphan(skb); //clone后sk和destructor都为空
skb->sk = sk;
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk); //skb->hash = sk->sk_txhash; sk_txhash是个随机数
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
// 设置tcp头
th = (struct tcphdr *)skb->data;
th->source = inet->inet_sport;
th->dest = inet->inet_dport;
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags);
th->check = 0;
th->urg_ptr = 0;
/* The urg_mode check is necessary during a below snd_una win probe */
if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
if (before(tp->snd_up, tcb->seq + 0x10000)) {
th->urg_ptr = htons(tp->snd_up - tcb->seq);
th->urg = 1;
} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
th->urg_ptr = htons(0xFFFF);
th->urg = 1;
}
}
tcp_options_write((__be32 *)(th + 1), tp, &opts);
skb_shinfo(skb)->gso_type = sk->sk_gso_type; //tcp_v4_connect->SKB_GSO_TCPV4
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
th->window = htons(tcp_select_window(sk));
tcp_ecn_send(sk, skb, th, tcp_header_size);
} else {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
th->window = htons(min(tp->rcv_wnd, 65535U)); //29200
}
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
tp->af_specific->calc_md5_hash(opts.hash_location,
md5, sk, skb);
}
#endif
icsk->icsk_af_ops->send_check(sk, skb); // tcp_v4_send_check, 计算tcp校验和
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
}
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
tp->segs_out += tcp_skb_pcount(skb);
/* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
/* Our usage of tstamp should remain private */
skb->tstamp.tv64 = 0;
/* Cleanup our debris for IP stacks */
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); //ip_queue_xmit
if (likely(err <= 0))
return err;
/*
#define NET_XMIT_DROP 0x01 /* skb dropped */
#define NET_XMIT_CN 0x02 /* congestion notification */
err>0表示拥塞
*/
tcp_enter_cwr(sk);
return net_xmit_eval(err);
}