选择的方向能够让自己乐在其中
实验目的
基于 packetdrill TCP 三次握手脚本,测试 TCP options 中 SACK 字段的由来,此次构造模拟的是服务器端场景。
对于 TCP options,各类介绍资料已经数不胜数,本篇就不再赘述。
基础脚本
# cat tcp_3hs_000.pkt
// TCP 基础之三次握手
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 10000 <mss 1460>
+0 > S. 0:0(0) ack 1 <...>
+0.01 < . 1:1(0) ack 1 win 10000
+0 accept(3, ..., ...) = 4
实验测试一
因为 < 属于构造数据包,所以 SYN 中 TCP Options 的 SACK 理解较为简单。
+0 < S 0:0(0) win 10000 <mss 1460, nop, nop, sackOK>
// <> 表示 TCP options,sackOK 表示开启 sack 。
# cat tcp_3hs_options_sack_002.pkt
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0
+0 < S 0:0(0) win 10000 <mss 1460, nop, nop, sackOK>
+0 > S. 0:0(0) ack 1 <...>
+0.01 < . 1:1(0) ack 1 win 10000
+0 accept(3, ..., ...) = 4
#
# packetdrill tcp_3hs_options_sack_002.pkt
#
观察 tcpdump 抓包结果可以看到确实 SYN TCP Options 中包含 SACK 字段 ,且服务器端响应的 SYN/ACK 同样也包含 SACK 字段。此时 TCP 三次握手的结果,也表明之后的 TCP 通讯将支持 SACK。
# tcpdump -i any port 8080 -nn
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
14:49:27.249137 tun0 In IP 192.0.2.1.39705 > 192.168.176.242.8080: Flags [S], seq 0, win 10000, options [mss 1460,nop,nop,sackOK], length 0
14:49:27.249172 tun0 Out IP 192.168.176.242.8080 > 192.0.2.1.39705: Flags [S.], seq 2522559115, ack 1, win 64240, options [mss 1460,nop,nop,sackOK], length 0
14:49:27.259259 tun0 In IP 192.0.2.1.39705 > 192.168.176.242.8080: Flags [.], ack 1, win 10000, length 0
14:49:27.259362 tun0 Out IP 192.168.176.242.8080 > 192.0.2.1.39705: Flags [F.], seq 1, ack 1, win 64240, length 0
14:49:27.259375 tun0 In IP 192.0.2.1.39705 > 192.168.176.242.8080: Flags [R.], seq 1, ack 1, win 10000, length 0
#
因为此时在服务器端开启了 SACK 支持,所以如上结果。
# sysctl -a | grep tcp_sack
net.ipv4.tcp_sack = 1
#
那么如果执行基础脚本,即注入 SYN 时不添加 SACK 字段,那么可以看到服务器响应的 SYN/ACK 也不包含 SACK 字段,即便此时在服务器端开启了 SACK 支持(net.ipv4.tcp_sack = 1)。说明服务器端会解析收到的 SYN ,如果 SYN 不包含 SACK 字段,则构造发送的 SYN/ACK 同样也不包含 SACK 字段。此时 TCP 三次握手的结果,也表明之后的 TCP 通讯将不支持 SACK。
# packetdrill tcp_3hs_000.pkt
#
#
# tcpdump -i any port 8080 -nn
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
17:37:38.309119 tun0 In IP 192.0.2.1.33615 > 192.168.71.111.8080: Flags [S], seq 0, win 10000, options [mss 1460], length 0
17:37:38.309147 tun0 Out IP 192.168.71.111.8080 > 192.0.2.1.33615: Flags [S.], seq 1333881343, ack 1, win 64240, options [mss 1460], length 0
17:37:38.319251 ? In IP 192.0.2.1.33615 > 192.168.71.111.8080: Flags [.], ack 1, win 10000, length 0
17:37:38.319398 ? Out IP 192.168.71.111.8080 > 192.0.2.1.33615: Flags [F.], seq 1, ack 1, win 64240, length 0
17:37:38.319415 ? In IP 192.0.2.1.33615 > 192.168.71.111.8080: Flags [R.], seq 1, ack 1, win 10000, length 0
实验测试二
那么如果将服务器端 SACK 支持关闭,则:
# sysctl -q net.ipv4.tcp_sack=0
#
执行 SYN 包含 SACK 字段的脚本,那么在 tcpdump 抓包结果中可以看到服务器端响应的 SYN/ACK 不包含 SACK 字段,因为服务器本端不支持 。此时 TCP 三次握手的结果,也表明之后的 TCP 通讯将不支持 SACK。
# packetdrill tcp_3hs_options_sack_002.pkt
#
# tcpdump -i any port 8080 -nn
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
17:42:16.401126 ? In IP 192.0.2.1.47307 > 192.168.18.218.8080: Flags [S], seq 0, win 10000, options [mss 1460,nop,nop,sackOK], length 0
17:42:16.401153 ? Out IP 192.168.18.218.8080 > 192.0.2.1.47307: Flags [S.], seq 979607804, ack 1, win 64240, options [mss 1460], length 0
17:42:16.411259 ? In IP 192.0.2.1.47307 > 192.168.18.218.8080: Flags [.], ack 1, win 10000, length 0
17:42:16.411398 ? Out IP 192.168.18.218.8080 > 192.0.2.1.47307: Flags [F.], seq 1, ack 1, win 64240, length 0
17:42:16.411412 ? In IP 192.0.2.1.47307 > 192.168.18.218.8080: Flags [R.], seq 1, ack 1, win 10000, length 0
SYN/ACK SACK
SYN/ACK 中 TCP options 的 SACK,代码中是通过 tcp_make_synack 函数确定的,相关的代码流程简要说明如下:
tcp_v4_rcv
|--tcp_v4_do_rcv
| |--tcp_rcv_state_process
| |--tcp_v4_conn_request
| |--tcp_conn_request
| |--tcp_v4_send_synack
| |--tcp_make_synack
tcp_v4_rcv 函数,根据 TCP_LISTEN 状态判断,调用 tcp_v4_do_rcv -> tcp_rcv_state_process 。
int tcp_v4_rcv(struct sk_buff *skb)
{
...
process:
...
if (sk->sk_state == TCP_LISTEN) {
ret = tcp_v4_do_rcv(sk, skb);
goto put_and_return;
}
...
}
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
...
if (tcp_rcv_state_process(sk, skb)) {
rsk = sk;
goto reset;
}
return 0;
...
}
tcp_rcv_state_process 函数,同样根据 TCP_LISTEN 判断,如果是 SYN,调用 tcp_v4_conn_request 函数,实际返回调用的 tcp_conn_request 函数。
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
...
case TCP_LISTEN:
...
if (th->syn) {
if (th->fin)
goto discard;
...
acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
...
}
...
}
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
/* Never answer to SYNs send to broadcast or multicast */
if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
goto drop;
return tcp_conn_request(&tcp_request_sock_ops,
&tcp_request_sock_ipv4_ops, sk, skb);
...
}
tcp_conn_request 有两部分相关,一是解析收到的 SYN 中的 options,tcp_parse_options;二是构建发送 SYN/ACK 相关,tcp_v4_send_synack。
int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
{
...
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
want_cookie ? NULL : &foc);
...
af_ops->send_synack(sk, dst, &fl, req, &foc,
!want_cookie ? TCP_SYNACK_NORMAL :
TCP_SYNACK_COOKIE,
skb);
...
}
如果满足选项长度正确、SYN 数据包、连接未建立以及 ipv4.sysctl_tcp_sack 为 1 时,则设置 sack_ok 为 1 。
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
void tcp_parse_options(const struct net *net,
const struct sk_buff *skb,
struct tcp_options_received *opt_rx, int estab,
struct tcp_fastopen_cookie *foc)
{
const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
int length = (th->doff * 4) - sizeof(struct tcphdr);
...
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
!estab && net->ipv4.sysctl_tcp_sack) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
break;
...
}
EXPORT_SYMBOL(tcp_parse_options);
/*These are used to set the sack_ok field in struct tcp_options_received */
tcp_v4_send_synack -> tcp_make_synack -> tcp_synack_options ,如果 sack_ok 为真,则设置 SACK 允许选项标志,最终通过 tcp_options_write 写入 options。
static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
{
...
skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
...
}
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type,
struct sk_buff *syn_skb)
{
...
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
foc, synack_type,
syn_skb) + sizeof(*th);
...
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
...
}
/* Set up TCP options for SYN-ACKs. */
static unsigned int tcp_synack_options(const struct sock *sk,
struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
...
if (likely(ireq->sack_ok)) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!ireq->tstamp_ok))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
...
}
往期推荐
原文始发于微信公众号(Echo Reply):Wireshark & Packetdrill | TCP 三次握手之 TCP Options 字段 SACK 续
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论