学习是一种漫长的修行
实验目的
基于 packetdrill TCP 三次握手脚本,继续测试 SYN 中 TCP options MSS 字段的由来。此次构造模拟的是客户端场景,而之前《TCP 三次握手之 TCP Options 字段 MSS》中构造模拟的是服务器端。
基础脚本
# cat tcp_3hs_007.pkt
// TCP 基础之三次握手
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+0.01 < S. 0:0(0) ack 1 win 10000 <mss 1000>
+0 > . 1:1(0) ack 1
SYN MSS
之前有提及 SYN 中 TCP options 的 MSS 实际上就是本地通告的 MSS,也就是 advmss,代码中是通过 tcp_advertise_mss 函数确定的,相关的代码流程简要说明如下:
sys_connect
|--inet_stream_connect
| |--tcp_v4_connect
| |--tcp_connect
| |--tcp_connect_init
| |--tcp_mss_clamp
| |--tcp_transmit_skb
| |--tcp_syn_options
| |--tcp_advertise_mss
tcp_connect 函数,其中调用 tcp_connect_init 函数初始化连接相关参数,以及构造发送 SYN 的 tcp_send_syn_data 或者 tcp_transmit_skb 函数。
/* Build a SYN and send it off. */
int tcp_connect(struct sock *sk)
{
...
tcp_connect_init(sk);
...
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
if (err == -ECONNREFUSED)
return err;
...
}
EXPORT_SYMBOL(tcp_connect);
tcp_connect_init 函数调用 tcp_mss_clamp 函数,将通过目的路由取到的 MSS 值与通过 setsockopt 设置的值 user_mss 比较,采用较小的一个作为 advmss。
/* Do all connect socket setups that can be done AF independent. */
static void tcp_connect_init(struct sock *sk)
{
...
tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
...
}
static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
{
/* We use READ_ONCE() here because socket might not be locked.
* This happens for listeners.
*/
u16 user_mss = READ_ONCE(tp->rx_opt.user_mss);
return (user_mss && user_mss < mss) ? user_mss : mss;
}
static inline u32
dst_metric_advmss(const struct dst_entry *dst)
{
u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS);
if (!advmss)
advmss = dst->ops->default_advmss(dst);
return advmss;
}
__tcp_transmit_skb 函数调用 tcp_syn_options 构造 SYN 的 Options 各字段,其中涉及 MSS,最终调用 tcp_advertise_mss 函数确定。
static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
...
if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) {
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
}
...
}
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
...
opts->mss = tcp_advertise_mss(sk);
...
}
tcp_advertise_mss 函数中会再次调用 dst_metric_advmss,仍将通过目的路由取到的 MSS 值与之前的 advmss 值做比较,取小最终作为 advmss 的值。
/* Calculate mss to advertise in SYN segment.
* RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that:
*
* 1. It is independent of path mtu.
* 2. Ideally, it is maximal possible segment size i.e. 65535-40.
* 3. For IPv4 it is reasonable to calculate it from maximal MTU of
* attached devices, because some buggy hosts are confused by
* large MSS.
* 4. We do not make 3, we advertise MSS, calculated from first
* hop device mtu, but allow to raise it to ip_rt_min_advmss.
* This may be overridden via information stored in routing table.
* 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible,
* probably even Jumbo".
*/
static __u16 tcp_advertise_mss(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct dst_entry *dst = __sk_dst_get(sk);
int mss = tp->advmss;
if (dst) {
unsigned int metric = dst_metric_advmss(dst);
if (metric < mss) {
mss = metric;
tp->advmss = mss;
}
}
return (__u16)mss;
}
所以如果一没有用 setsockopt 设置小值 user_mss,二没有设置目的路由 MSS 小值,那么就会使用目的路由默认的 MSS 值 1460,所以一般常见的 MSS 都是 1460。
实验测试一
首先通过 setsockopt 选项 TCP_MAXSEG 指定 MSS 值 为 1000,则客户端 advmss 值为 1000,snd_mss 值为 1000。
# cat tcp_3hs_options_mss_006.pkt
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+.1 getsockopt(3, SOL_TCP, TCP_MAXSEG, [536], [4]) = 0
+.1 setsockopt(3, SOL_TCP, TCP_MAXSEG, [1000], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+0.01 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
+0 %{print (tcpi_advmss)}%
+0 %{print (tcpi_snd_mss)}%
#
# packetdrill tcp_3hs_options_mss_006.pkt
1000
1000
#
通过 tcpdump 抓包的结果显示,SYN options [mss 1000...] 。
# tcpdump -i any -nn port 8080
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
23:25:02.548445 ? Out IP 192.168.105.151.44738 > 192.0.2.1.8080: Flags [S], seq 2533124468, win 65000, options [mss 1000,sackOK,TS val 2267576998 ecr 0,nop,wscale 7], length 0
23:25:02.558587 ? In IP 192.0.2.1.8080 > 192.168.105.151.44738: Flags [S.], seq 0, ack 2533124469, win 10000, options [mss 1460], length 0
23:25:02.558612 ? Out IP 192.168.105.151.44738 > 192.0.2.1.8080: Flags [.], ack 1, win 65000, length 0
23:25:02.581563 ? Out IP 192.168.105.151.44738 > 192.0.2.1.8080: Flags [F.], seq 1, ack 1, win 65000, length 0
23:25:02.581584 ? In IP 192.0.2.1.8080 > 192.168.105.151.44738: Flags [R.], seq 1, ack 1, win 10000, length 0
实验测试二
继续再通过修改目的路由 MSS 值 为 1000,从而影响 advmss。
通过 packetdrill pkt 文件中修改 advmss 值为 1000,执行脚本后,tcpdump 捕获结果可以看到 SYN options [mss 1000...] 。
# cat tcp_3hs_options_mss_007.pkt
`ip route change 192.0.2.0/24 dev tun0 advmss 1000`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+.1 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
#
# packetdrill tcp_3hs_options_mss_007.pkt
#
# tcpdump -i any -nn port 8080
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
23:36:44.288239 tun0 Out IP 192.168.29.8.52722 > 192.0.2.1.8080: Flags [S], seq 3120031645, win 65000, options [mss 1000,sackOK,TS val 4143511250 ecr 0,nop,wscale 7], length 0
23:36:44.388369 ? In IP 192.0.2.1.8080 > 192.168.29.8.52722: Flags [S.], seq 0, ack 3120031646, win 10000, options [mss 1460], length 0
23:36:44.388395 ? Out IP 192.168.29.8.52722 > 192.0.2.1.8080: Flags [.], ack 1, win 65000, length 0
23:36:44.388494 ? Out IP 192.168.29.8.52722 > 192.0.2.1.8080: Flags [F.], seq 1, ack 1, win 65000, length 0
23:36:44.388506 ? In IP 192.0.2.1.8080 > 192.168.29.8.52722: Flags [R.], seq 1, ack 1, win 10000, length 0
实验测试三
扩展测试一下,通过 setsockopt 设置 user_mss 方式的上下限值,测试结果 MSS 可设置的值范围在 [88, 32767] 。
修改值为 87,运行脚本后报错,无效参数。
# cat tcp_3hs_options_mss_006.pkt
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+.1 getsockopt(3, SOL_TCP, TCP_MAXSEG, [536], [4]) = 0
+.1 setsockopt(3, SOL_TCP, TCP_MAXSEG, [87], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+0.01 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
+0 %{print (tcpi_advmss)}%
+0 %{print (tcpi_snd_mss)}%
#
# packetdrill tcp_3hs_options_mss_006.pkt
tcp_3hs_options_mss_006.pkt:6: runtime error in setsockopt call: Expected result 0 but got -1 with errno 22 (Invalid argument)
#
修改值为 88,运行脚本成功,TCP_MAXSEG 可设置的最小值为 88 。
# cat tcp_3hs_options_mss_006.pkt
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+.1 getsockopt(3, SOL_TCP, TCP_MAXSEG, [536], [4]) = 0
+.1 setsockopt(3, SOL_TCP, TCP_MAXSEG, [88], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+0.01 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
+0 %{print (tcpi_advmss)}%
+0 %{print (tcpi_snd_mss)}%
root@hecs-132451:~/test/pkt#
# packetdrill tcp_3hs_options_mss_006.pkt
88
88
#
/include/net/tcp.h 中的定义:
/* Minimal accepted MSS. It is (60+60+8) - (20+20). */
#define TCP_MIN_MSS 88U
修改值为 32768,运行脚本后报错,无效参数。
# cat tcp_3hs_options_mss_006.pkt
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+.1 getsockopt(3, SOL_TCP, TCP_MAXSEG, [536], [4]) = 0
+.1 setsockopt(3, SOL_TCP, TCP_MAXSEG, [32768], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+0.01 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
+0 %{print (tcpi_advmss)}%
+0 %{print (tcpi_snd_mss)}%
# packetdrill tcp_3hs_options_mss_006.pkt
tcp_3hs_options_mss_006.pkt:6: runtime error in setsockopt call: Expected result 0 but got -1 with errno 22 (Invalid argument)
#
修改值为 32767,运行脚本成功,TCP_MAXSEG 可设置的最大值为 32767 。当然这只是设置值,实际和其他值取小后,advmss 和 snd_mss 的值仍为 1460.
# cat tcp_3hs_options_mss_006.pkt
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+.1 getsockopt(3, SOL_TCP, TCP_MAXSEG, [536], [4]) = 0
+.1 setsockopt(3, SOL_TCP, TCP_MAXSEG, [32767], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+0.01 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
+0 %{print (tcpi_advmss)}%
+0 %{print (tcpi_snd_mss)}%
# packetdrill tcp_3hs_options_mss_006.pkt
1460
1460
#
实验测试四
扩展测试一下,通过目的路由设置 MSS 的方式的上下限值,测试结果 MSS 可设置的值范围在 [0, 4294967295] 。
修改值为 0,运行脚本成功,实际上就是没有通过目的路由设置 MSS,则输出的 MSS 仍为 1460.
# cat tcp_3hs_options_mss_007.pkt
`ip route change 192.0.2.0/24 dev tun0 advmss 0`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+.1 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
# packetdrill tcp_3hs_options_mss_007.pkt
#
抓包结果 SYN MSS 仍为 1460.
# tcpdump -i any -nn port 8080
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
09:33:14.776233 tun0 Out IP 192.168.176.190.36014 > 192.0.2.1.8080: Flags [S], seq 225558972, win 64240, options [mss 1460,sackOK,TS val 2713709784 ecr 0,nop,wscale 7], length 0
09:33:14.876393 ? In IP 192.0.2.1.8080 > 192.168.176.190.36014: Flags [S.], seq 0, ack 225558973, win 10000, options [mss 1460], length 0
09:33:14.876419 ? Out IP 192.168.176.190.36014 > 192.0.2.1.8080: Flags [.], ack 1, win 64240, length 0
09:33:14.876523 ? Out IP 192.168.176.190.36014 > 192.0.2.1.8080: Flags [F.], seq 1, ack 1, win 64240, length 0
09:33:14.876536 ? In IP 192.0.2.1.8080 > 192.168.176.190.36014: Flags [R.], seq 1, ack 1, win 10000, length 0
修改值为 1,运行脚本成功。
# cat tcp_3hs_options_mss_007.pkt
`ip route change 192.0.2.0/24 dev tun0 advmss 1`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+.1 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
# packetdrill tcp_3hs_options_mss_007.pkt
#
抓包结果 SYN MSS 确实为 1,比较特殊。
# tcpdump -i any -nn port 8080
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
09:43:10.028232 tun0 Out IP 192.168.126.99.38514 > 192.0.2.1.8080: Flags [S], seq 3761061913, win 65535, options [mss 1,sackOK,TS val 3486118798 ecr 0,nop,wscale 7], length 0
09:43:10.128374 ? In IP 192.0.2.1.8080 > 192.168.126.99.38514: Flags [S.], seq 0, ack 3761061914, win 10000, options [mss 1460], length 0
09:43:10.128402 ? Out IP 192.168.126.99.38514 > 192.0.2.1.8080: Flags [.], ack 1, win 65535, length 0
09:43:10.128504 ? Out IP 192.168.126.99.38514 > 192.0.2.1.8080: Flags [F.], seq 1, ack 1, win 65535, length 0
09:43:10.128517 ? In IP 192.0.2.1.8080 > 192.168.126.99.38514: Flags [R.], seq 1, ack 1, win 10000, length 0
修改值为 4294967296,运行脚本报错,mss 值无效。
# cat tcp_3hs_options_mss_007.pkt
`ip route change 192.0.2.0/24 dev tun0 advmss 4294967296`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+.1 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
# packetdrill tcp_3hs_options_mss_007.pkt
Error: argument "4294967296" is wrong: "mss" value is invalid
tcp_3hs_options_mss_007.pkt: error executing init command: non-zero status 255
#
修改值为 4294967295,运行脚本成功。
# cat tcp_3hs_options_mss_007.pkt
`ip route change 192.0.2.0/24 dev tun0 advmss 4294967295`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+0 setsockopt(3, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0 > S 0:0(0) <...>
+.1 < S. 0:0(0) ack 1 win 10000 <mss 1460>
+0 > . 1:1(0) ack 1
# packetdrill tcp_3hs_options_mss_007.pkt
#
抓包结果 SYN MSS 为 65495,实际为 MTU 的最大值 65535 减 40 得到的 65495。
# tcpdump -i any -nn port 8080
tcpdump: data link type LINUX_SLL2
tcpdump: verbose output suppressed, use -v[v]... for full protocol decode
listening on any, link-type LINUX_SLL2 (Linux cooked v2), snapshot length 262144 bytes
10:13:05.819001 tun0 Out IP 192.168.137.156.42214 > 192.0.2.1.8080: Flags [S], seq 2661209176, win 65495, options [mss 65495,sackOK,TS val 329728433 ecr 0,nop,wscale 7], length 0
10:13:05.919141 ? In IP 192.0.2.1.8080 > 192.168.137.156.42214: Flags [S.], seq 0, ack 2661209177, win 10000, options [mss 1460], length 0
10:13:05.919170 ? Out IP 192.168.137.156.42214 > 192.0.2.1.8080: Flags [.], ack 1, win 65495, length 0
10:13:05.919272 ? Out IP 192.168.137.156.42214 > 192.0.2.1.8080: Flags [F.], seq 1, ack 1, win 65495, length 0
10:13:05.919285 ? In IP 192.0.2.1.8080 > 192.168.137.156.42214: Flags [R.], seq 1, ack 1, win 10000, length 0
往期推荐
原文始发于微信公众号(Echo Reply):Wireshark & Packetdrill | TCP 三次握手之 SYN MSS
- 左青龙
- 微信扫一扫
-
- 右白虎
- 微信扫一扫
-
评论