[e16e8f2] | 1 | #include <string.h> |
---|
| 2 | #include <stdlib.h> |
---|
| 3 | #include <stdio.h> |
---|
| 4 | #include <assert.h> |
---|
| 5 | #include <errno.h> |
---|
| 6 | #include <byteswap.h> |
---|
| 7 | #include <gpxe/timer.h> |
---|
| 8 | #include <gpxe/iobuf.h> |
---|
| 9 | #include <gpxe/malloc.h> |
---|
| 10 | #include <gpxe/retry.h> |
---|
| 11 | #include <gpxe/refcnt.h> |
---|
| 12 | #include <gpxe/xfer.h> |
---|
| 13 | #include <gpxe/open.h> |
---|
| 14 | #include <gpxe/uri.h> |
---|
| 15 | #include <gpxe/tcpip.h> |
---|
| 16 | #include <gpxe/tcp.h> |
---|
| 17 | |
---|
| 18 | /** @file |
---|
| 19 | * |
---|
| 20 | * TCP protocol |
---|
| 21 | * |
---|
| 22 | */ |
---|
| 23 | |
---|
| 24 | FILE_LICENCE ( GPL2_OR_LATER ); |
---|
| 25 | |
---|
| 26 | /** A TCP connection */ |
---|
| 27 | struct tcp_connection { |
---|
| 28 | /** Reference counter */ |
---|
| 29 | struct refcnt refcnt; |
---|
| 30 | /** List of TCP connections */ |
---|
| 31 | struct list_head list; |
---|
| 32 | |
---|
| 33 | /** Data transfer interface */ |
---|
| 34 | struct xfer_interface xfer; |
---|
| 35 | /** Data transfer interface closed flag */ |
---|
| 36 | int xfer_closed; |
---|
| 37 | |
---|
| 38 | /** Remote socket address */ |
---|
| 39 | struct sockaddr_tcpip peer; |
---|
| 40 | /** Local port, in network byte order */ |
---|
| 41 | unsigned int local_port; |
---|
| 42 | |
---|
| 43 | /** Current TCP state */ |
---|
| 44 | unsigned int tcp_state; |
---|
| 45 | /** Previous TCP state |
---|
| 46 | * |
---|
| 47 | * Maintained only for debug messages |
---|
| 48 | */ |
---|
| 49 | unsigned int prev_tcp_state; |
---|
| 50 | /** Current sequence number |
---|
| 51 | * |
---|
| 52 | * Equivalent to SND.UNA in RFC 793 terminology. |
---|
| 53 | */ |
---|
| 54 | uint32_t snd_seq; |
---|
| 55 | /** Unacknowledged sequence count |
---|
| 56 | * |
---|
| 57 | * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology. |
---|
| 58 | */ |
---|
| 59 | uint32_t snd_sent; |
---|
| 60 | /** Send window |
---|
| 61 | * |
---|
| 62 | * Equivalent to SND.WND in RFC 793 terminology |
---|
| 63 | */ |
---|
| 64 | uint32_t snd_win; |
---|
| 65 | /** Current acknowledgement number |
---|
| 66 | * |
---|
| 67 | * Equivalent to RCV.NXT in RFC 793 terminology. |
---|
| 68 | */ |
---|
| 69 | uint32_t rcv_ack; |
---|
| 70 | /** Receive window |
---|
| 71 | * |
---|
| 72 | * Equivalent to RCV.WND in RFC 793 terminology. |
---|
| 73 | */ |
---|
| 74 | uint32_t rcv_win; |
---|
| 75 | /** Most recent received timestamp |
---|
| 76 | * |
---|
| 77 | * Equivalent to TS.Recent in RFC 1323 terminology. |
---|
| 78 | */ |
---|
| 79 | uint32_t ts_recent; |
---|
| 80 | /** Timestamps enabled */ |
---|
| 81 | int timestamps; |
---|
| 82 | |
---|
| 83 | /** Transmit queue */ |
---|
| 84 | struct list_head queue; |
---|
| 85 | /** Retransmission timer */ |
---|
| 86 | struct retry_timer timer; |
---|
| 87 | }; |
---|
| 88 | |
---|
| 89 | /** |
---|
| 90 | * List of registered TCP connections |
---|
| 91 | */ |
---|
| 92 | static LIST_HEAD ( tcp_conns ); |
---|
| 93 | |
---|
| 94 | /* Forward declarations */ |
---|
| 95 | static struct xfer_interface_operations tcp_xfer_operations; |
---|
| 96 | static void tcp_expired ( struct retry_timer *timer, int over ); |
---|
| 97 | static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, |
---|
| 98 | uint32_t win ); |
---|
| 99 | |
---|
| 100 | /** |
---|
| 101 | * Name TCP state |
---|
| 102 | * |
---|
| 103 | * @v state TCP state |
---|
| 104 | * @ret name Name of TCP state |
---|
| 105 | */ |
---|
| 106 | static inline __attribute__ (( always_inline )) const char * |
---|
| 107 | tcp_state ( int state ) { |
---|
| 108 | switch ( state ) { |
---|
| 109 | case TCP_CLOSED: return "CLOSED"; |
---|
| 110 | case TCP_LISTEN: return "LISTEN"; |
---|
| 111 | case TCP_SYN_SENT: return "SYN_SENT"; |
---|
| 112 | case TCP_SYN_RCVD: return "SYN_RCVD"; |
---|
| 113 | case TCP_ESTABLISHED: return "ESTABLISHED"; |
---|
| 114 | case TCP_FIN_WAIT_1: return "FIN_WAIT_1"; |
---|
| 115 | case TCP_FIN_WAIT_2: return "FIN_WAIT_2"; |
---|
| 116 | case TCP_CLOSING_OR_LAST_ACK: return "CLOSING/LAST_ACK"; |
---|
| 117 | case TCP_TIME_WAIT: return "TIME_WAIT"; |
---|
| 118 | case TCP_CLOSE_WAIT: return "CLOSE_WAIT"; |
---|
| 119 | default: return "INVALID"; |
---|
| 120 | } |
---|
| 121 | } |
---|
| 122 | |
---|
| 123 | /** |
---|
| 124 | * Dump TCP state transition |
---|
| 125 | * |
---|
| 126 | * @v tcp TCP connection |
---|
| 127 | */ |
---|
| 128 | static inline __attribute__ (( always_inline )) void |
---|
| 129 | tcp_dump_state ( struct tcp_connection *tcp ) { |
---|
| 130 | |
---|
| 131 | if ( tcp->tcp_state != tcp->prev_tcp_state ) { |
---|
| 132 | DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp, |
---|
| 133 | tcp_state ( tcp->prev_tcp_state ), |
---|
| 134 | tcp_state ( tcp->tcp_state ) ); |
---|
| 135 | } |
---|
| 136 | tcp->prev_tcp_state = tcp->tcp_state; |
---|
| 137 | } |
---|
| 138 | |
---|
| 139 | /** |
---|
| 140 | * Dump TCP flags |
---|
| 141 | * |
---|
| 142 | * @v flags TCP flags |
---|
| 143 | */ |
---|
| 144 | static inline __attribute__ (( always_inline )) void |
---|
| 145 | tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) { |
---|
| 146 | if ( flags & TCP_RST ) |
---|
| 147 | DBGC2 ( tcp, " RST" ); |
---|
| 148 | if ( flags & TCP_SYN ) |
---|
| 149 | DBGC2 ( tcp, " SYN" ); |
---|
| 150 | if ( flags & TCP_PSH ) |
---|
| 151 | DBGC2 ( tcp, " PSH" ); |
---|
| 152 | if ( flags & TCP_FIN ) |
---|
| 153 | DBGC2 ( tcp, " FIN" ); |
---|
| 154 | if ( flags & TCP_ACK ) |
---|
| 155 | DBGC2 ( tcp, " ACK" ); |
---|
| 156 | } |
---|
| 157 | |
---|
| 158 | /*************************************************************************** |
---|
| 159 | * |
---|
| 160 | * Open and close |
---|
| 161 | * |
---|
| 162 | *************************************************************************** |
---|
| 163 | */ |
---|
| 164 | |
---|
| 165 | /** |
---|
| 166 | * Bind TCP connection to local port |
---|
| 167 | * |
---|
| 168 | * @v tcp TCP connection |
---|
| 169 | * @v port Local port number, in network-endian order |
---|
| 170 | * @ret rc Return status code |
---|
| 171 | * |
---|
| 172 | * If the port is 0, the connection is assigned an available port |
---|
| 173 | * between 1024 and 65535. |
---|
| 174 | */ |
---|
| 175 | static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) { |
---|
| 176 | struct tcp_connection *existing; |
---|
| 177 | static uint16_t try_port = 1023; |
---|
| 178 | |
---|
| 179 | /* If no port specified, find the first available port */ |
---|
| 180 | if ( ! port ) { |
---|
| 181 | while ( try_port ) { |
---|
| 182 | try_port++; |
---|
| 183 | if ( try_port < 1024 ) |
---|
| 184 | continue; |
---|
| 185 | if ( tcp_bind ( tcp, htons ( try_port ) ) == 0 ) |
---|
| 186 | return 0; |
---|
| 187 | } |
---|
| 188 | DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp ); |
---|
| 189 | return -EADDRINUSE; |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | /* Attempt bind to local port */ |
---|
| 193 | list_for_each_entry ( existing, &tcp_conns, list ) { |
---|
| 194 | if ( existing->local_port == port ) { |
---|
| 195 | DBGC ( tcp, "TCP %p could not bind: port %d in use\n", |
---|
| 196 | tcp, ntohs ( port ) ); |
---|
| 197 | return -EADDRINUSE; |
---|
| 198 | } |
---|
| 199 | } |
---|
| 200 | tcp->local_port = port; |
---|
| 201 | |
---|
| 202 | DBGC ( tcp, "TCP %p bound to port %d\n", tcp, ntohs ( port ) ); |
---|
| 203 | return 0; |
---|
| 204 | } |
---|
| 205 | |
---|
| 206 | /** |
---|
| 207 | * Open a TCP connection |
---|
| 208 | * |
---|
| 209 | * @v xfer Data transfer interface |
---|
| 210 | * @v peer Peer socket address |
---|
| 211 | * @v local Local socket address, or NULL |
---|
| 212 | * @ret rc Return status code |
---|
| 213 | */ |
---|
| 214 | static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer, |
---|
| 215 | struct sockaddr *local ) { |
---|
| 216 | struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer; |
---|
| 217 | struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local; |
---|
| 218 | struct tcp_connection *tcp; |
---|
| 219 | unsigned int bind_port; |
---|
| 220 | int rc; |
---|
| 221 | |
---|
| 222 | /* Allocate and initialise structure */ |
---|
| 223 | tcp = zalloc ( sizeof ( *tcp ) ); |
---|
| 224 | if ( ! tcp ) |
---|
| 225 | return -ENOMEM; |
---|
| 226 | DBGC ( tcp, "TCP %p allocated\n", tcp ); |
---|
| 227 | xfer_init ( &tcp->xfer, &tcp_xfer_operations, &tcp->refcnt ); |
---|
| 228 | tcp->prev_tcp_state = TCP_CLOSED; |
---|
| 229 | tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN ); |
---|
| 230 | tcp_dump_state ( tcp ); |
---|
| 231 | tcp->snd_seq = random(); |
---|
| 232 | INIT_LIST_HEAD ( &tcp->queue ); |
---|
| 233 | tcp->timer.expired = tcp_expired; |
---|
| 234 | memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) ); |
---|
| 235 | |
---|
| 236 | /* Bind to local port */ |
---|
| 237 | bind_port = ( st_local ? st_local->st_port : 0 ); |
---|
| 238 | if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 ) |
---|
| 239 | goto err; |
---|
| 240 | |
---|
| 241 | /* Start timer to initiate SYN */ |
---|
| 242 | start_timer_nodelay ( &tcp->timer ); |
---|
| 243 | |
---|
| 244 | /* Attach parent interface, transfer reference to connection |
---|
| 245 | * list and return |
---|
| 246 | */ |
---|
| 247 | xfer_plug_plug ( &tcp->xfer, xfer ); |
---|
| 248 | list_add ( &tcp->list, &tcp_conns ); |
---|
| 249 | return 0; |
---|
| 250 | |
---|
| 251 | err: |
---|
| 252 | ref_put ( &tcp->refcnt ); |
---|
| 253 | return rc; |
---|
| 254 | } |
---|
| 255 | |
---|
| 256 | /** |
---|
| 257 | * Close TCP connection |
---|
| 258 | * |
---|
| 259 | * @v tcp TCP connection |
---|
| 260 | * @v rc Reason for close |
---|
| 261 | * |
---|
| 262 | * Closes the data transfer interface. If the TCP state machine is in |
---|
| 263 | * a suitable state, the connection will be deleted. |
---|
| 264 | */ |
---|
| 265 | static void tcp_close ( struct tcp_connection *tcp, int rc ) { |
---|
| 266 | struct io_buffer *iobuf; |
---|
| 267 | struct io_buffer *tmp; |
---|
| 268 | |
---|
| 269 | /* Close data transfer interface */ |
---|
| 270 | xfer_nullify ( &tcp->xfer ); |
---|
| 271 | xfer_close ( &tcp->xfer, rc ); |
---|
| 272 | tcp->xfer_closed = 1; |
---|
| 273 | |
---|
| 274 | /* If we are in CLOSED, or have otherwise not yet received a |
---|
| 275 | * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the |
---|
| 276 | * connection. |
---|
| 277 | */ |
---|
| 278 | if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { |
---|
| 279 | |
---|
| 280 | /* Transition to CLOSED for the sake of debugging messages */ |
---|
| 281 | tcp->tcp_state = TCP_CLOSED; |
---|
| 282 | tcp_dump_state ( tcp ); |
---|
| 283 | |
---|
| 284 | /* Free any unsent I/O buffers */ |
---|
| 285 | list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { |
---|
| 286 | list_del ( &iobuf->list ); |
---|
| 287 | free_iob ( iobuf ); |
---|
| 288 | } |
---|
| 289 | |
---|
| 290 | /* Remove from list and drop reference */ |
---|
| 291 | stop_timer ( &tcp->timer ); |
---|
| 292 | list_del ( &tcp->list ); |
---|
| 293 | ref_put ( &tcp->refcnt ); |
---|
| 294 | DBGC ( tcp, "TCP %p connection deleted\n", tcp ); |
---|
| 295 | return; |
---|
| 296 | } |
---|
| 297 | |
---|
| 298 | /* If we have not had our SYN acknowledged (i.e. we are in |
---|
| 299 | * SYN_RCVD), pretend that it has been acknowledged so that we |
---|
| 300 | * can send a FIN without breaking things. |
---|
| 301 | */ |
---|
| 302 | if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) |
---|
| 303 | tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 ); |
---|
| 304 | |
---|
| 305 | /* If we have no data remaining to send, start sending FIN */ |
---|
| 306 | if ( list_empty ( &tcp->queue ) ) { |
---|
| 307 | tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); |
---|
| 308 | tcp_dump_state ( tcp ); |
---|
| 309 | } |
---|
| 310 | } |
---|
| 311 | |
---|
| 312 | /*************************************************************************** |
---|
| 313 | * |
---|
| 314 | * Transmit data path |
---|
| 315 | * |
---|
| 316 | *************************************************************************** |
---|
| 317 | */ |
---|
| 318 | |
---|
| 319 | /** |
---|
| 320 | * Calculate transmission window |
---|
| 321 | * |
---|
| 322 | * @v tcp TCP connection |
---|
| 323 | * @ret len Maximum length that can be sent in a single packet |
---|
| 324 | */ |
---|
| 325 | static size_t tcp_xmit_win ( struct tcp_connection *tcp ) { |
---|
| 326 | size_t len; |
---|
| 327 | |
---|
| 328 | /* Not ready if we're not in a suitable connection state */ |
---|
| 329 | if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) |
---|
| 330 | return 0; |
---|
| 331 | |
---|
| 332 | /* Length is the minimum of the receiver's window and the path MTU */ |
---|
| 333 | len = tcp->snd_win; |
---|
| 334 | if ( len > TCP_PATH_MTU ) |
---|
| 335 | len = TCP_PATH_MTU; |
---|
| 336 | |
---|
| 337 | return len; |
---|
| 338 | } |
---|
| 339 | |
---|
| 340 | /** |
---|
| 341 | * Process TCP transmit queue |
---|
| 342 | * |
---|
| 343 | * @v tcp TCP connection |
---|
| 344 | * @v max_len Maximum length to process |
---|
| 345 | * @v dest I/O buffer to fill with data, or NULL |
---|
| 346 | * @v remove Remove data from queue |
---|
| 347 | * @ret len Length of data processed |
---|
| 348 | * |
---|
| 349 | * This processes at most @c max_len bytes from the TCP connection's |
---|
| 350 | * transmit queue. Data will be copied into the @c dest I/O buffer |
---|
| 351 | * (if provided) and, if @c remove is true, removed from the transmit |
---|
| 352 | * queue. |
---|
| 353 | */ |
---|
| 354 | static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len, |
---|
| 355 | struct io_buffer *dest, int remove ) { |
---|
| 356 | struct io_buffer *iobuf; |
---|
| 357 | struct io_buffer *tmp; |
---|
| 358 | size_t frag_len; |
---|
| 359 | size_t len = 0; |
---|
| 360 | |
---|
| 361 | list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) { |
---|
| 362 | frag_len = iob_len ( iobuf ); |
---|
| 363 | if ( frag_len > max_len ) |
---|
| 364 | frag_len = max_len; |
---|
| 365 | if ( dest ) { |
---|
| 366 | memcpy ( iob_put ( dest, frag_len ), iobuf->data, |
---|
| 367 | frag_len ); |
---|
| 368 | } |
---|
| 369 | if ( remove ) { |
---|
| 370 | iob_pull ( iobuf, frag_len ); |
---|
| 371 | if ( ! iob_len ( iobuf ) ) { |
---|
| 372 | list_del ( &iobuf->list ); |
---|
| 373 | free_iob ( iobuf ); |
---|
| 374 | } |
---|
| 375 | } |
---|
| 376 | len += frag_len; |
---|
| 377 | max_len -= frag_len; |
---|
| 378 | } |
---|
| 379 | return len; |
---|
| 380 | } |
---|
| 381 | |
---|
| 382 | /** |
---|
| 383 | * Transmit any outstanding data |
---|
| 384 | * |
---|
| 385 | * @v tcp TCP connection |
---|
| 386 | * @v force_send Force sending of packet |
---|
| 387 | * |
---|
| 388 | * Transmits any outstanding data on the connection. |
---|
| 389 | * |
---|
| 390 | * Note that even if an error is returned, the retransmission timer |
---|
| 391 | * will have been started if necessary, and so the stack will |
---|
| 392 | * eventually attempt to retransmit the failed packet. |
---|
| 393 | */ |
---|
| 394 | static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) { |
---|
| 395 | struct io_buffer *iobuf; |
---|
| 396 | struct tcp_header *tcphdr; |
---|
| 397 | struct tcp_mss_option *mssopt; |
---|
| 398 | struct tcp_timestamp_padded_option *tsopt; |
---|
| 399 | void *payload; |
---|
| 400 | unsigned int flags; |
---|
| 401 | size_t len = 0; |
---|
| 402 | uint32_t seq_len; |
---|
| 403 | uint32_t app_win; |
---|
| 404 | uint32_t max_rcv_win; |
---|
| 405 | int rc; |
---|
| 406 | |
---|
| 407 | /* If retransmission timer is already running, do nothing */ |
---|
| 408 | if ( timer_running ( &tcp->timer ) ) |
---|
| 409 | return 0; |
---|
| 410 | |
---|
| 411 | /* Calculate both the actual (payload) and sequence space |
---|
| 412 | * lengths that we wish to transmit. |
---|
| 413 | */ |
---|
| 414 | if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) { |
---|
| 415 | len = tcp_process_queue ( tcp, tcp_xmit_win ( tcp ), |
---|
| 416 | NULL, 0 ); |
---|
| 417 | } |
---|
| 418 | seq_len = len; |
---|
| 419 | flags = TCP_FLAGS_SENDING ( tcp->tcp_state ); |
---|
| 420 | if ( flags & ( TCP_SYN | TCP_FIN ) ) { |
---|
| 421 | /* SYN or FIN consume one byte, and we can never send both */ |
---|
| 422 | assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) ); |
---|
| 423 | seq_len++; |
---|
| 424 | } |
---|
| 425 | tcp->snd_sent = seq_len; |
---|
| 426 | |
---|
| 427 | /* If we have nothing to transmit, stop now */ |
---|
| 428 | if ( ( seq_len == 0 ) && ! force_send ) |
---|
| 429 | return 0; |
---|
| 430 | |
---|
| 431 | /* If we are transmitting anything that requires |
---|
| 432 | * acknowledgement (i.e. consumes sequence space), start the |
---|
| 433 | * retransmission timer. Do this before attempting to |
---|
| 434 | * allocate the I/O buffer, in case allocation itself fails. |
---|
| 435 | */ |
---|
| 436 | if ( seq_len ) |
---|
| 437 | start_timer ( &tcp->timer ); |
---|
| 438 | |
---|
| 439 | /* Allocate I/O buffer */ |
---|
| 440 | iobuf = alloc_iob ( len + MAX_HDR_LEN ); |
---|
| 441 | if ( ! iobuf ) { |
---|
| 442 | DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x " |
---|
| 443 | "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ), |
---|
| 444 | tcp->rcv_ack ); |
---|
| 445 | return -ENOMEM; |
---|
| 446 | } |
---|
| 447 | iob_reserve ( iobuf, MAX_HDR_LEN ); |
---|
| 448 | |
---|
| 449 | /* Fill data payload from transmit queue */ |
---|
| 450 | tcp_process_queue ( tcp, len, iobuf, 0 ); |
---|
| 451 | |
---|
| 452 | /* Expand receive window if possible */ |
---|
| 453 | max_rcv_win = ( ( freemem * 3 ) / 4 ); |
---|
| 454 | if ( max_rcv_win > TCP_MAX_WINDOW_SIZE ) |
---|
| 455 | max_rcv_win = TCP_MAX_WINDOW_SIZE; |
---|
| 456 | app_win = xfer_window ( &tcp->xfer ); |
---|
| 457 | if ( max_rcv_win > app_win ) |
---|
| 458 | max_rcv_win = app_win; |
---|
| 459 | max_rcv_win &= ~0x03; /* Keep everything dword-aligned */ |
---|
| 460 | if ( tcp->rcv_win < max_rcv_win ) |
---|
| 461 | tcp->rcv_win = max_rcv_win; |
---|
| 462 | |
---|
| 463 | /* Fill up the TCP header */ |
---|
| 464 | payload = iobuf->data; |
---|
| 465 | if ( flags & TCP_SYN ) { |
---|
| 466 | mssopt = iob_push ( iobuf, sizeof ( *mssopt ) ); |
---|
| 467 | mssopt->kind = TCP_OPTION_MSS; |
---|
| 468 | mssopt->length = sizeof ( *mssopt ); |
---|
| 469 | mssopt->mss = htons ( TCP_MSS ); |
---|
| 470 | } |
---|
| 471 | if ( ( flags & TCP_SYN ) || tcp->timestamps ) { |
---|
| 472 | tsopt = iob_push ( iobuf, sizeof ( *tsopt ) ); |
---|
| 473 | memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) ); |
---|
| 474 | tsopt->tsopt.kind = TCP_OPTION_TS; |
---|
| 475 | tsopt->tsopt.length = sizeof ( tsopt->tsopt ); |
---|
| 476 | tsopt->tsopt.tsval = ntohl ( currticks() ); |
---|
| 477 | tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent ); |
---|
| 478 | } |
---|
| 479 | if ( ! ( flags & TCP_SYN ) ) |
---|
| 480 | flags |= TCP_PSH; |
---|
| 481 | tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); |
---|
| 482 | memset ( tcphdr, 0, sizeof ( *tcphdr ) ); |
---|
| 483 | tcphdr->src = tcp->local_port; |
---|
| 484 | tcphdr->dest = tcp->peer.st_port; |
---|
| 485 | tcphdr->seq = htonl ( tcp->snd_seq ); |
---|
| 486 | tcphdr->ack = htonl ( tcp->rcv_ack ); |
---|
| 487 | tcphdr->hlen = ( ( payload - iobuf->data ) << 2 ); |
---|
| 488 | tcphdr->flags = flags; |
---|
| 489 | tcphdr->win = htons ( tcp->rcv_win ); |
---|
| 490 | tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); |
---|
| 491 | |
---|
| 492 | /* Dump header */ |
---|
| 493 | DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4zd", |
---|
| 494 | tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), |
---|
| 495 | ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ), |
---|
| 496 | ntohl ( tcphdr->ack ), len ); |
---|
| 497 | tcp_dump_flags ( tcp, tcphdr->flags ); |
---|
| 498 | DBGC2 ( tcp, "\n" ); |
---|
| 499 | |
---|
| 500 | /* Transmit packet */ |
---|
| 501 | if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL, |
---|
| 502 | &tcphdr->csum ) ) != 0 ) { |
---|
| 503 | DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n", |
---|
| 504 | tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), |
---|
| 505 | tcp->rcv_ack, strerror ( rc ) ); |
---|
| 506 | return rc; |
---|
| 507 | } |
---|
| 508 | |
---|
| 509 | return 0; |
---|
| 510 | } |
---|
| 511 | |
---|
| 512 | /** |
---|
| 513 | * Retransmission timer expired |
---|
| 514 | * |
---|
| 515 | * @v timer Retry timer |
---|
| 516 | * @v over Failure indicator |
---|
| 517 | */ |
---|
| 518 | static void tcp_expired ( struct retry_timer *timer, int over ) { |
---|
| 519 | struct tcp_connection *tcp = |
---|
| 520 | container_of ( timer, struct tcp_connection, timer ); |
---|
| 521 | int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ); |
---|
| 522 | |
---|
| 523 | DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp, |
---|
| 524 | ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ), |
---|
| 525 | tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack ); |
---|
| 526 | |
---|
| 527 | assert ( ( tcp->tcp_state == TCP_SYN_SENT ) || |
---|
| 528 | ( tcp->tcp_state == TCP_SYN_RCVD ) || |
---|
| 529 | ( tcp->tcp_state == TCP_ESTABLISHED ) || |
---|
| 530 | ( tcp->tcp_state == TCP_FIN_WAIT_1 ) || |
---|
| 531 | ( tcp->tcp_state == TCP_TIME_WAIT ) || |
---|
| 532 | ( tcp->tcp_state == TCP_CLOSE_WAIT ) || |
---|
| 533 | ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) ); |
---|
| 534 | |
---|
| 535 | if ( over || graceful_close ) { |
---|
| 536 | /* If we have finally timed out and given up, or if |
---|
| 537 | * this is the result of a graceful close, terminate |
---|
| 538 | * the connection |
---|
| 539 | */ |
---|
| 540 | tcp->tcp_state = TCP_CLOSED; |
---|
| 541 | tcp_dump_state ( tcp ); |
---|
| 542 | tcp_close ( tcp, -ETIMEDOUT ); |
---|
| 543 | } else { |
---|
| 544 | /* Otherwise, retransmit the packet */ |
---|
| 545 | tcp_xmit ( tcp, 0 ); |
---|
| 546 | } |
---|
| 547 | } |
---|
| 548 | |
---|
| 549 | /** |
---|
| 550 | * Send RST response to incoming packet |
---|
| 551 | * |
---|
| 552 | * @v in_tcphdr TCP header of incoming packet |
---|
| 553 | * @ret rc Return status code |
---|
| 554 | */ |
---|
| 555 | static int tcp_xmit_reset ( struct tcp_connection *tcp, |
---|
| 556 | struct sockaddr_tcpip *st_dest, |
---|
| 557 | struct tcp_header *in_tcphdr ) { |
---|
| 558 | struct io_buffer *iobuf; |
---|
| 559 | struct tcp_header *tcphdr; |
---|
| 560 | int rc; |
---|
| 561 | |
---|
| 562 | /* Allocate space for dataless TX buffer */ |
---|
| 563 | iobuf = alloc_iob ( MAX_HDR_LEN ); |
---|
| 564 | if ( ! iobuf ) { |
---|
| 565 | DBGC ( tcp, "TCP %p could not allocate iobuf for RST " |
---|
| 566 | "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ), |
---|
| 567 | ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) ); |
---|
| 568 | return -ENOMEM; |
---|
| 569 | } |
---|
| 570 | iob_reserve ( iobuf, MAX_HDR_LEN ); |
---|
| 571 | |
---|
| 572 | /* Construct RST response */ |
---|
| 573 | tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) ); |
---|
| 574 | memset ( tcphdr, 0, sizeof ( *tcphdr ) ); |
---|
| 575 | tcphdr->src = in_tcphdr->dest; |
---|
| 576 | tcphdr->dest = in_tcphdr->src; |
---|
| 577 | tcphdr->seq = in_tcphdr->ack; |
---|
| 578 | tcphdr->ack = in_tcphdr->seq; |
---|
| 579 | tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 ); |
---|
| 580 | tcphdr->flags = ( TCP_RST | TCP_ACK ); |
---|
| 581 | tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE ); |
---|
| 582 | tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) ); |
---|
| 583 | |
---|
| 584 | /* Dump header */ |
---|
| 585 | DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x %08x %4d", |
---|
| 586 | tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ), |
---|
| 587 | ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ), |
---|
| 588 | ntohl ( tcphdr->ack ), 0 ); |
---|
| 589 | tcp_dump_flags ( tcp, tcphdr->flags ); |
---|
| 590 | DBGC2 ( tcp, "\n" ); |
---|
| 591 | |
---|
| 592 | /* Transmit packet */ |
---|
| 593 | if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest, |
---|
| 594 | NULL, &tcphdr->csum ) ) != 0 ) { |
---|
| 595 | DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: " |
---|
| 596 | "%s\n", tcp, ntohl ( in_tcphdr->ack ), |
---|
| 597 | ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ), |
---|
| 598 | strerror ( rc ) ); |
---|
| 599 | return rc; |
---|
| 600 | } |
---|
| 601 | |
---|
| 602 | return 0; |
---|
| 603 | } |
---|
| 604 | |
---|
| 605 | /*************************************************************************** |
---|
| 606 | * |
---|
| 607 | * Receive data path |
---|
| 608 | * |
---|
| 609 | *************************************************************************** |
---|
| 610 | */ |
---|
| 611 | |
---|
| 612 | /** |
---|
| 613 | * Identify TCP connection by local port number |
---|
| 614 | * |
---|
| 615 | * @v local_port Local port (in network-endian order) |
---|
| 616 | * @ret tcp TCP connection, or NULL |
---|
| 617 | */ |
---|
| 618 | static struct tcp_connection * tcp_demux ( unsigned int local_port ) { |
---|
| 619 | struct tcp_connection *tcp; |
---|
| 620 | |
---|
| 621 | list_for_each_entry ( tcp, &tcp_conns, list ) { |
---|
| 622 | if ( tcp->local_port == local_port ) |
---|
| 623 | return tcp; |
---|
| 624 | } |
---|
| 625 | return NULL; |
---|
| 626 | } |
---|
| 627 | |
---|
| 628 | /** |
---|
| 629 | * Parse TCP received options |
---|
| 630 | * |
---|
| 631 | * @v tcp TCP connection |
---|
| 632 | * @v data Raw options data |
---|
| 633 | * @v len Raw options length |
---|
| 634 | * @v options Options structure to fill in |
---|
| 635 | */ |
---|
| 636 | static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data, |
---|
| 637 | size_t len, struct tcp_options *options ) { |
---|
| 638 | const void *end = ( data + len ); |
---|
| 639 | const struct tcp_option *option; |
---|
| 640 | unsigned int kind; |
---|
| 641 | |
---|
| 642 | memset ( options, 0, sizeof ( *options ) ); |
---|
| 643 | while ( data < end ) { |
---|
| 644 | option = data; |
---|
| 645 | kind = option->kind; |
---|
| 646 | if ( kind == TCP_OPTION_END ) |
---|
| 647 | return; |
---|
| 648 | if ( kind == TCP_OPTION_NOP ) { |
---|
| 649 | data++; |
---|
| 650 | continue; |
---|
| 651 | } |
---|
| 652 | switch ( kind ) { |
---|
| 653 | case TCP_OPTION_MSS: |
---|
| 654 | options->mssopt = data; |
---|
| 655 | break; |
---|
| 656 | case TCP_OPTION_TS: |
---|
| 657 | options->tsopt = data; |
---|
| 658 | break; |
---|
| 659 | default: |
---|
| 660 | DBGC ( tcp, "TCP %p received unknown option %d\n", |
---|
| 661 | tcp, kind ); |
---|
| 662 | break; |
---|
| 663 | } |
---|
| 664 | data += option->length; |
---|
| 665 | } |
---|
| 666 | } |
---|
| 667 | |
---|
| 668 | /** |
---|
| 669 | * Consume received sequence space |
---|
| 670 | * |
---|
| 671 | * @v tcp TCP connection |
---|
| 672 | * @v seq_len Sequence space length to consume |
---|
| 673 | */ |
---|
| 674 | static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) { |
---|
| 675 | tcp->rcv_ack += seq_len; |
---|
| 676 | if ( tcp->rcv_win > seq_len ) { |
---|
| 677 | tcp->rcv_win -= seq_len; |
---|
| 678 | } else { |
---|
| 679 | tcp->rcv_win = 0; |
---|
| 680 | } |
---|
| 681 | } |
---|
| 682 | |
---|
| 683 | /** |
---|
| 684 | * Handle TCP received SYN |
---|
| 685 | * |
---|
| 686 | * @v tcp TCP connection |
---|
| 687 | * @v seq SEQ value (in host-endian order) |
---|
| 688 | * @v options TCP options |
---|
| 689 | * @ret rc Return status code |
---|
| 690 | */ |
---|
| 691 | static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq, |
---|
| 692 | struct tcp_options *options ) { |
---|
| 693 | |
---|
| 694 | /* Synchronise sequence numbers on first SYN */ |
---|
| 695 | if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) { |
---|
| 696 | tcp->rcv_ack = seq; |
---|
| 697 | if ( options->tsopt ) |
---|
| 698 | tcp->timestamps = 1; |
---|
| 699 | } |
---|
| 700 | |
---|
| 701 | /* Ignore duplicate SYN */ |
---|
| 702 | if ( ( tcp->rcv_ack - seq ) > 0 ) |
---|
| 703 | return 0; |
---|
| 704 | |
---|
| 705 | /* Mark SYN as received and start sending ACKs with each packet */ |
---|
| 706 | tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) | |
---|
| 707 | TCP_STATE_RCVD ( TCP_SYN ) ); |
---|
| 708 | |
---|
| 709 | /* Acknowledge SYN */ |
---|
| 710 | tcp_rx_seq ( tcp, 1 ); |
---|
| 711 | |
---|
| 712 | return 0; |
---|
| 713 | } |
---|
| 714 | |
---|
| 715 | /** |
---|
| 716 | * Handle TCP received ACK |
---|
| 717 | * |
---|
| 718 | * @v tcp TCP connection |
---|
| 719 | * @v ack ACK value (in host-endian order) |
---|
| 720 | * @v win WIN value (in host-endian order) |
---|
| 721 | * @ret rc Return status code |
---|
| 722 | */ |
---|
| 723 | static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack, |
---|
| 724 | uint32_t win ) { |
---|
| 725 | uint32_t ack_len = ( ack - tcp->snd_seq ); |
---|
| 726 | size_t len; |
---|
| 727 | unsigned int acked_flags; |
---|
| 728 | |
---|
| 729 | /* Check for out-of-range or old duplicate ACKs */ |
---|
| 730 | if ( ack_len > tcp->snd_sent ) { |
---|
| 731 | DBGC ( tcp, "TCP %p received ACK for %08x..%08x, " |
---|
| 732 | "sent only %08x..%08x\n", tcp, tcp->snd_seq, |
---|
| 733 | ( tcp->snd_seq + ack_len ), tcp->snd_seq, |
---|
| 734 | ( tcp->snd_seq + tcp->snd_sent ) ); |
---|
| 735 | |
---|
| 736 | if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) { |
---|
| 737 | /* Just ignore what might be old duplicate ACKs */ |
---|
| 738 | return 0; |
---|
| 739 | } else { |
---|
| 740 | /* Send RST if an out-of-range ACK is received |
---|
| 741 | * on a not-yet-established connection, as per |
---|
| 742 | * RFC 793. |
---|
| 743 | */ |
---|
| 744 | return -EINVAL; |
---|
| 745 | } |
---|
| 746 | } |
---|
| 747 | |
---|
| 748 | /* Ignore ACKs that don't actually acknowledge any new data. |
---|
| 749 | * (In particular, do not stop the retransmission timer; this |
---|
| 750 | * avoids creating a sorceror's apprentice syndrome when a |
---|
| 751 | * duplicate ACK is received and we still have data in our |
---|
| 752 | * transmit queue.) |
---|
| 753 | */ |
---|
| 754 | if ( ack_len == 0 ) |
---|
| 755 | return 0; |
---|
| 756 | |
---|
| 757 | /* Stop the retransmission timer */ |
---|
| 758 | stop_timer ( &tcp->timer ); |
---|
| 759 | |
---|
| 760 | /* Determine acknowledged flags and data length */ |
---|
| 761 | len = ack_len; |
---|
| 762 | acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) & |
---|
| 763 | ( TCP_SYN | TCP_FIN ) ); |
---|
| 764 | if ( acked_flags ) |
---|
| 765 | len--; |
---|
| 766 | |
---|
| 767 | /* Update SEQ and sent counters, and window size */ |
---|
| 768 | tcp->snd_seq = ack; |
---|
| 769 | tcp->snd_sent = 0; |
---|
| 770 | tcp->snd_win = win; |
---|
| 771 | |
---|
| 772 | /* Remove any acknowledged data from transmit queue */ |
---|
| 773 | tcp_process_queue ( tcp, len, NULL, 1 ); |
---|
| 774 | |
---|
| 775 | /* Mark SYN/FIN as acknowledged if applicable. */ |
---|
| 776 | if ( acked_flags ) |
---|
| 777 | tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags ); |
---|
| 778 | |
---|
| 779 | /* Start sending FIN if we've had all possible data ACKed */ |
---|
| 780 | if ( list_empty ( &tcp->queue ) && tcp->xfer_closed ) |
---|
| 781 | tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN ); |
---|
| 782 | |
---|
| 783 | return 0; |
---|
| 784 | } |
---|
| 785 | |
---|
| 786 | /** |
---|
| 787 | * Handle TCP received data |
---|
| 788 | * |
---|
| 789 | * @v tcp TCP connection |
---|
| 790 | * @v seq SEQ value (in host-endian order) |
---|
| 791 | * @v iobuf I/O buffer |
---|
| 792 | * @ret rc Return status code |
---|
| 793 | * |
---|
| 794 | * This function takes ownership of the I/O buffer. |
---|
| 795 | */ |
---|
| 796 | static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq, |
---|
| 797 | struct io_buffer *iobuf ) { |
---|
| 798 | uint32_t already_rcvd; |
---|
| 799 | uint32_t len; |
---|
| 800 | int rc; |
---|
| 801 | |
---|
| 802 | /* Ignore duplicate or out-of-order data */ |
---|
| 803 | already_rcvd = ( tcp->rcv_ack - seq ); |
---|
| 804 | len = iob_len ( iobuf ); |
---|
| 805 | if ( already_rcvd >= len ) { |
---|
| 806 | free_iob ( iobuf ); |
---|
| 807 | return 0; |
---|
| 808 | } |
---|
| 809 | iob_pull ( iobuf, already_rcvd ); |
---|
| 810 | len -= already_rcvd; |
---|
| 811 | |
---|
| 812 | /* Deliver data to application */ |
---|
| 813 | if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) { |
---|
| 814 | DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n", |
---|
| 815 | tcp, seq, ( seq + len ), strerror ( rc ) ); |
---|
| 816 | return rc; |
---|
| 817 | } |
---|
| 818 | |
---|
| 819 | /* Acknowledge new data */ |
---|
| 820 | tcp_rx_seq ( tcp, len ); |
---|
| 821 | |
---|
| 822 | return 0; |
---|
| 823 | } |
---|
| 824 | |
---|
| 825 | /** |
---|
| 826 | * Handle TCP received FIN |
---|
| 827 | * |
---|
| 828 | * @v tcp TCP connection |
---|
| 829 | * @v seq SEQ value (in host-endian order) |
---|
| 830 | * @ret rc Return status code |
---|
| 831 | */ |
---|
| 832 | static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) { |
---|
| 833 | |
---|
| 834 | /* Ignore duplicate or out-of-order FIN */ |
---|
| 835 | if ( ( tcp->rcv_ack - seq ) > 0 ) |
---|
| 836 | return 0; |
---|
| 837 | |
---|
| 838 | /* Mark FIN as received and acknowledge it */ |
---|
| 839 | tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN ); |
---|
| 840 | tcp_rx_seq ( tcp, 1 ); |
---|
| 841 | |
---|
| 842 | /* Close connection */ |
---|
| 843 | tcp_close ( tcp, 0 ); |
---|
| 844 | |
---|
| 845 | return 0; |
---|
| 846 | } |
---|
| 847 | |
---|
| 848 | /** |
---|
| 849 | * Handle TCP received RST |
---|
| 850 | * |
---|
| 851 | * @v tcp TCP connection |
---|
| 852 | * @v seq SEQ value (in host-endian order) |
---|
| 853 | * @ret rc Return status code |
---|
| 854 | */ |
---|
| 855 | static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) { |
---|
| 856 | |
---|
| 857 | /* Accept RST only if it falls within the window. If we have |
---|
| 858 | * not yet received a SYN, then we have no window to test |
---|
| 859 | * against, so fall back to checking that our SYN has been |
---|
| 860 | * ACKed. |
---|
| 861 | */ |
---|
| 862 | if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) { |
---|
| 863 | if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win ) |
---|
| 864 | return 0; |
---|
| 865 | } else { |
---|
| 866 | if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) ) |
---|
| 867 | return 0; |
---|
| 868 | } |
---|
| 869 | |
---|
| 870 | /* Abort connection */ |
---|
| 871 | tcp->tcp_state = TCP_CLOSED; |
---|
| 872 | tcp_dump_state ( tcp ); |
---|
| 873 | tcp_close ( tcp, -ECONNRESET ); |
---|
| 874 | |
---|
| 875 | DBGC ( tcp, "TCP %p connection reset by peer\n", tcp ); |
---|
| 876 | return -ECONNRESET; |
---|
| 877 | } |
---|
| 878 | |
---|
| 879 | /** |
---|
| 880 | * Process received packet |
---|
| 881 | * |
---|
| 882 | * @v iobuf I/O buffer |
---|
| 883 | * @v st_src Partially-filled source address |
---|
| 884 | * @v st_dest Partially-filled destination address |
---|
| 885 | * @v pshdr_csum Pseudo-header checksum |
---|
| 886 | * @ret rc Return status code |
---|
| 887 | */ |
---|
| 888 | static int tcp_rx ( struct io_buffer *iobuf, |
---|
| 889 | struct sockaddr_tcpip *st_src, |
---|
| 890 | struct sockaddr_tcpip *st_dest __unused, |
---|
| 891 | uint16_t pshdr_csum ) { |
---|
| 892 | struct tcp_header *tcphdr = iobuf->data; |
---|
| 893 | struct tcp_connection *tcp; |
---|
| 894 | struct tcp_options options; |
---|
| 895 | size_t hlen; |
---|
| 896 | uint16_t csum; |
---|
| 897 | uint32_t start_seq; |
---|
| 898 | uint32_t seq; |
---|
| 899 | uint32_t ack; |
---|
| 900 | uint32_t win; |
---|
| 901 | unsigned int flags; |
---|
| 902 | size_t len; |
---|
| 903 | int rc; |
---|
| 904 | |
---|
| 905 | /* Sanity check packet */ |
---|
| 906 | if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) { |
---|
| 907 | DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n", |
---|
| 908 | iob_len ( iobuf ), sizeof ( *tcphdr ) ); |
---|
| 909 | rc = -EINVAL; |
---|
| 910 | goto discard; |
---|
| 911 | } |
---|
| 912 | hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4; |
---|
| 913 | if ( hlen < sizeof ( *tcphdr ) ) { |
---|
| 914 | DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n", |
---|
| 915 | hlen, sizeof ( *tcphdr ) ); |
---|
| 916 | rc = -EINVAL; |
---|
| 917 | goto discard; |
---|
| 918 | } |
---|
| 919 | if ( hlen > iob_len ( iobuf ) ) { |
---|
| 920 | DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n", |
---|
| 921 | hlen, iob_len ( iobuf ) ); |
---|
| 922 | rc = -EINVAL; |
---|
| 923 | goto discard; |
---|
| 924 | } |
---|
| 925 | csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data, |
---|
| 926 | iob_len ( iobuf ) ); |
---|
| 927 | if ( csum != 0 ) { |
---|
| 928 | DBG ( "TCP checksum incorrect (is %04x including checksum " |
---|
| 929 | "field, should be 0000)\n", csum ); |
---|
| 930 | rc = -EINVAL; |
---|
| 931 | goto discard; |
---|
| 932 | } |
---|
| 933 | |
---|
| 934 | /* Parse parameters from header and strip header */ |
---|
| 935 | tcp = tcp_demux ( tcphdr->dest ); |
---|
| 936 | start_seq = seq = ntohl ( tcphdr->seq ); |
---|
| 937 | ack = ntohl ( tcphdr->ack ); |
---|
| 938 | win = ntohs ( tcphdr->win ); |
---|
| 939 | flags = tcphdr->flags; |
---|
| 940 | tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ), |
---|
| 941 | ( hlen - sizeof ( *tcphdr ) ), &options ); |
---|
| 942 | iob_pull ( iobuf, hlen ); |
---|
| 943 | len = iob_len ( iobuf ); |
---|
| 944 | |
---|
| 945 | /* Dump header */ |
---|
| 946 | DBGC2 ( tcp, "TCP %p RX %d<-%d %08x %08x..%08zx %4zd", |
---|
| 947 | tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ), |
---|
| 948 | ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ), |
---|
| 949 | ( ntohl ( tcphdr->seq ) + len + |
---|
| 950 | ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len); |
---|
| 951 | tcp_dump_flags ( tcp, tcphdr->flags ); |
---|
| 952 | DBGC2 ( tcp, "\n" ); |
---|
| 953 | |
---|
| 954 | /* If no connection was found, send RST */ |
---|
| 955 | if ( ! tcp ) { |
---|
| 956 | tcp_xmit_reset ( tcp, st_src, tcphdr ); |
---|
| 957 | rc = -ENOTCONN; |
---|
| 958 | goto discard; |
---|
| 959 | } |
---|
| 960 | |
---|
| 961 | /* Handle ACK, if present */ |
---|
| 962 | if ( flags & TCP_ACK ) { |
---|
| 963 | if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) { |
---|
| 964 | tcp_xmit_reset ( tcp, st_src, tcphdr ); |
---|
| 965 | goto discard; |
---|
| 966 | } |
---|
| 967 | } |
---|
| 968 | |
---|
| 969 | /* Handle SYN, if present */ |
---|
| 970 | if ( flags & TCP_SYN ) { |
---|
| 971 | tcp_rx_syn ( tcp, seq, &options ); |
---|
| 972 | seq++; |
---|
| 973 | } |
---|
| 974 | |
---|
| 975 | /* Handle RST, if present */ |
---|
| 976 | if ( flags & TCP_RST ) { |
---|
| 977 | if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 ) |
---|
| 978 | goto discard; |
---|
| 979 | } |
---|
| 980 | |
---|
| 981 | /* Handle new data, if any */ |
---|
| 982 | tcp_rx_data ( tcp, seq, iobuf ); |
---|
| 983 | seq += len; |
---|
| 984 | |
---|
| 985 | /* Handle FIN, if present */ |
---|
| 986 | if ( flags & TCP_FIN ) { |
---|
| 987 | tcp_rx_fin ( tcp, seq ); |
---|
| 988 | seq++; |
---|
| 989 | } |
---|
| 990 | |
---|
| 991 | /* Update timestamp, if present and applicable */ |
---|
| 992 | if ( ( seq == tcp->rcv_ack ) && options.tsopt ) |
---|
| 993 | tcp->ts_recent = ntohl ( options.tsopt->tsval ); |
---|
| 994 | |
---|
| 995 | /* Dump out any state change as a result of the received packet */ |
---|
| 996 | tcp_dump_state ( tcp ); |
---|
| 997 | |
---|
| 998 | /* Send out any pending data. We force sending a reply if either |
---|
| 999 | * |
---|
| 1000 | * a) the peer is expecting an ACK (i.e. consumed sequence space), or |
---|
| 1001 | * b) either end of the packet was outside the receive window |
---|
| 1002 | * |
---|
| 1003 | * Case (b) enables us to support TCP keepalives using |
---|
| 1004 | * zero-length packets, which we would otherwise ignore. Note |
---|
| 1005 | * that for case (b), we need *only* consider zero-length |
---|
| 1006 | * packets, since non-zero-length packets will already be |
---|
| 1007 | * caught by case (a). |
---|
| 1008 | */ |
---|
| 1009 | tcp_xmit ( tcp, ( ( start_seq != seq ) || |
---|
| 1010 | ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) ); |
---|
| 1011 | |
---|
| 1012 | /* If this packet was the last we expect to receive, set up |
---|
| 1013 | * timer to expire and cause the connection to be freed. |
---|
| 1014 | */ |
---|
| 1015 | if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) { |
---|
| 1016 | tcp->timer.timeout = ( 2 * TCP_MSL ); |
---|
| 1017 | start_timer ( &tcp->timer ); |
---|
| 1018 | } |
---|
| 1019 | |
---|
| 1020 | return 0; |
---|
| 1021 | |
---|
| 1022 | discard: |
---|
| 1023 | /* Free received packet */ |
---|
| 1024 | free_iob ( iobuf ); |
---|
| 1025 | return rc; |
---|
| 1026 | } |
---|
| 1027 | |
---|
| 1028 | /** TCP protocol */ |
---|
| 1029 | struct tcpip_protocol tcp_protocol __tcpip_protocol = { |
---|
| 1030 | .name = "TCP", |
---|
| 1031 | .rx = tcp_rx, |
---|
| 1032 | .tcpip_proto = IP_TCP, |
---|
| 1033 | }; |
---|
| 1034 | |
---|
| 1035 | /*************************************************************************** |
---|
| 1036 | * |
---|
| 1037 | * Data transfer interface |
---|
| 1038 | * |
---|
| 1039 | *************************************************************************** |
---|
| 1040 | */ |
---|
| 1041 | |
---|
| 1042 | /** |
---|
| 1043 | * Close interface |
---|
| 1044 | * |
---|
| 1045 | * @v xfer Data transfer interface |
---|
| 1046 | * @v rc Reason for close |
---|
| 1047 | */ |
---|
| 1048 | static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) { |
---|
| 1049 | struct tcp_connection *tcp = |
---|
| 1050 | container_of ( xfer, struct tcp_connection, xfer ); |
---|
| 1051 | |
---|
| 1052 | /* Close data transfer interface */ |
---|
| 1053 | tcp_close ( tcp, rc ); |
---|
| 1054 | |
---|
| 1055 | /* Transmit FIN, if possible */ |
---|
| 1056 | tcp_xmit ( tcp, 0 ); |
---|
| 1057 | } |
---|
| 1058 | |
---|
| 1059 | /** |
---|
| 1060 | * Check flow control window |
---|
| 1061 | * |
---|
| 1062 | * @v xfer Data transfer interface |
---|
| 1063 | * @ret len Length of window |
---|
| 1064 | */ |
---|
| 1065 | static size_t tcp_xfer_window ( struct xfer_interface *xfer ) { |
---|
| 1066 | struct tcp_connection *tcp = |
---|
| 1067 | container_of ( xfer, struct tcp_connection, xfer ); |
---|
| 1068 | |
---|
| 1069 | /* Not ready if data queue is non-empty. This imposes a limit |
---|
| 1070 | * of only one unACKed packet in the TX queue at any time; we |
---|
| 1071 | * do this to conserve memory usage. |
---|
| 1072 | */ |
---|
| 1073 | if ( ! list_empty ( &tcp->queue ) ) |
---|
| 1074 | return 0; |
---|
| 1075 | |
---|
| 1076 | /* Return TCP window length */ |
---|
| 1077 | return tcp_xmit_win ( tcp ); |
---|
| 1078 | } |
---|
| 1079 | |
---|
| 1080 | /** |
---|
| 1081 | * Deliver datagram as I/O buffer |
---|
| 1082 | * |
---|
| 1083 | * @v xfer Data transfer interface |
---|
| 1084 | * @v iobuf Datagram I/O buffer |
---|
| 1085 | * @v meta Data transfer metadata |
---|
| 1086 | * @ret rc Return status code |
---|
| 1087 | */ |
---|
| 1088 | static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer, |
---|
| 1089 | struct io_buffer *iobuf, |
---|
| 1090 | struct xfer_metadata *meta __unused ) { |
---|
| 1091 | struct tcp_connection *tcp = |
---|
| 1092 | container_of ( xfer, struct tcp_connection, xfer ); |
---|
| 1093 | |
---|
| 1094 | /* Enqueue packet */ |
---|
| 1095 | list_add_tail ( &iobuf->list, &tcp->queue ); |
---|
| 1096 | |
---|
| 1097 | /* Transmit data, if possible */ |
---|
| 1098 | tcp_xmit ( tcp, 0 ); |
---|
| 1099 | |
---|
| 1100 | return 0; |
---|
| 1101 | } |
---|
| 1102 | |
---|
| 1103 | /** TCP data transfer interface operations */ |
---|
| 1104 | static struct xfer_interface_operations tcp_xfer_operations = { |
---|
| 1105 | .close = tcp_xfer_close, |
---|
| 1106 | .vredirect = ignore_xfer_vredirect, |
---|
| 1107 | .window = tcp_xfer_window, |
---|
| 1108 | .alloc_iob = default_xfer_alloc_iob, |
---|
| 1109 | .deliver_iob = tcp_xfer_deliver_iob, |
---|
| 1110 | .deliver_raw = xfer_deliver_as_iob, |
---|
| 1111 | }; |
---|
| 1112 | |
---|
| 1113 | /*************************************************************************** |
---|
| 1114 | * |
---|
| 1115 | * Openers |
---|
| 1116 | * |
---|
| 1117 | *************************************************************************** |
---|
| 1118 | */ |
---|
| 1119 | |
---|
| 1120 | /** TCP socket opener */ |
---|
| 1121 | struct socket_opener tcp_socket_opener __socket_opener = { |
---|
| 1122 | .semantics = TCP_SOCK_STREAM, |
---|
| 1123 | .family = AF_INET, |
---|
| 1124 | .open = tcp_open, |
---|
| 1125 | }; |
---|
| 1126 | |
---|
| 1127 | /** Linkage hack */ |
---|
| 1128 | int tcp_sock_stream = TCP_SOCK_STREAM; |
---|
| 1129 | |
---|
| 1130 | /** |
---|
| 1131 | * Open TCP URI |
---|
| 1132 | * |
---|
| 1133 | * @v xfer Data transfer interface |
---|
| 1134 | * @v uri URI |
---|
| 1135 | * @ret rc Return status code |
---|
| 1136 | */ |
---|
| 1137 | static int tcp_open_uri ( struct xfer_interface *xfer, struct uri *uri ) { |
---|
| 1138 | struct sockaddr_tcpip peer; |
---|
| 1139 | |
---|
| 1140 | /* Sanity check */ |
---|
| 1141 | if ( ! uri->host ) |
---|
| 1142 | return -EINVAL; |
---|
| 1143 | |
---|
| 1144 | memset ( &peer, 0, sizeof ( peer ) ); |
---|
| 1145 | peer.st_port = htons ( uri_port ( uri, 0 ) ); |
---|
| 1146 | return xfer_open_named_socket ( xfer, SOCK_STREAM, |
---|
| 1147 | ( struct sockaddr * ) &peer, |
---|
| 1148 | uri->host, NULL ); |
---|
| 1149 | } |
---|
| 1150 | |
---|
| 1151 | /** TCP URI opener */ |
---|
| 1152 | struct uri_opener tcp_uri_opener __uri_opener = { |
---|
| 1153 | .scheme = "tcp", |
---|
| 1154 | .open = tcp_open_uri, |
---|
| 1155 | }; |
---|
| 1156 | |
---|