1 /*
2 * Copyright (C) 2016-2025 Red Hat, Inc. All rights reserved.
3 *
4 * Author: Christine Caulfield <ccaulfie@redhat.com>
5 *
6 * This software licensed under LGPL-2.0+
7 */
8
9 #include "config.h"
10
11 #include <string.h>
12 #include <unistd.h>
13 #include <errno.h>
14 #include <sys/types.h>
15 #include <sys/socket.h>
16 #include <stdlib.h>
17 #include <netinet/in.h>
18 #include <netinet/ip.h>
19 #include <netinet/in_systm.h>
20 #include <netinet/ip_icmp.h>
21 #include <net/if.h>
22 #if defined (IP_RECVERR) || defined (IPV6_RECVERR)
23 #include <linux/errqueue.h>
24 #endif
25
26 #include "libknet.h"
27 #include "compat.h"
28 #include "host.h"
29 #include "link.h"
30 #include "logging.h"
31 #include "common.h"
32 #include "netutils.h"
33 #include "transport_common.h"
34 #include "transport_udp.h"
35 #include "transports.h"
36 #include "threads_common.h"
37
38 typedef struct udp_handle_info {
39 struct qb_list_head links_list;
40 } udp_handle_info_t;
41
42 typedef struct udp_link_info {
43 struct qb_list_head list;
44 struct sockaddr_storage local_address;
45 int socket_fd;
46 int on_epoll;
47 } udp_link_info_t;
48
49 int udp_transport_link_set_config(knet_handle_t knet_h, struct knet_link *kn_link)
50 {
51 int err = 0, savederrno = 0;
52 int sock = -1;
53 struct epoll_event ev;
54 udp_link_info_t *info;
55 udp_handle_info_t *handle_info = knet_h->transports[KNET_TRANSPORT_UDP];
56
57 /*
58 * Only allocate a new link if the local address is different
59 */
60 qb_list_for_each_entry(info, &handle_info->links_list, list) {
61 if (memcmp(&info->local_address, &kn_link->src_addr, sizeof(struct sockaddr_storage)) == 0) {
62 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Re-using existing UDP socket for new link");
63 kn_link->outsock = info->socket_fd;
64 kn_link->transport_link = info;
65 kn_link->transport_connected = 1;
66 return 0;
67 }
68 }
69
70 info = malloc(sizeof(udp_link_info_t));
71 if (!info) {
72 err = -1;
73 goto exit_error;
74 }
75 memset(info, 0, sizeof(udp_link_info_t));
76
77 sock = socket(kn_link->src_addr.ss_family, SOCK_DGRAM, 0);
78 if (sock < 0) {
79 savederrno = errno;
80 err = -1;
81 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to create listener socket: %s",
82 strerror(savederrno));
83 goto exit_error;
84 }
85
86 if (_configure_transport_socket(knet_h, sock, &kn_link->src_addr, kn_link->flags, "UDP") < 0) {
87 savederrno = errno;
88 err = -1;
89 goto exit_error;
90 }
91
92 #ifdef IP_RECVERR
93 if (kn_link->src_addr.ss_family == AF_INET) {
94 int value = 1;
95 if (setsockopt(sock, SOL_IP, IP_RECVERR, &value, sizeof(value)) <0) {
96 savederrno = errno;
97 err = -1;
98 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVERR on socket: %s",
99 strerror(savederrno));
100 goto exit_error;
101 }
102 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_RECVERR enabled on socket: %i", sock);
103 }
104 #else
105 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_RECVERR not available in this build/platform");
106 #endif
107 #ifdef IP_PKTINFO
108 if (kn_link->src_addr.ss_family == AF_INET) {
109 int value = 1;
110 if (setsockopt(sock, SOL_IP, IP_PKTINFO, &value, sizeof(value)) <0) {
111 savederrno = errno;
112 err = -1;
113 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set PKTINFO on socket: %s",
114 strerror(savederrno));
115 goto exit_error;
116 }
117 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IP_PKTINFO enabled on socket: %i", sock);
118 }
119 #endif
120 #ifdef IPV6_RECVPKTINFO
121 if (kn_link->src_addr.ss_family == AF_INET6) {
122 int value = 1;
123 if (setsockopt(sock, IPPROTO_IPV6, IPV6_RECVPKTINFO, &value, sizeof(value)) <0) {
124 savederrno = errno;
125 err = -1;
126 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVPKTINFO on socket: %s",
127 strerror(savederrno));
128 goto exit_error;
129 }
130 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVPKTINFO enabled on socket: %i", sock);
131 }
132 #endif
133 #ifdef IPV6_RECVERR
134 if (kn_link->src_addr.ss_family == AF_INET6) {
135 int value = 1;
136 if (setsockopt(sock, SOL_IPV6, IPV6_RECVERR, &value, sizeof(value)) <0) {
137 savederrno = errno;
138 err = -1;
139 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set RECVERR on socket: %s",
140 strerror(savederrno));
141 goto exit_error;
142 }
143 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVERR enabled on socket: %i", sock);
144 }
145 #else
146 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "IPV6_RECVERR not available in this build/platform");
147 #endif
148
149 if (bind(sock, (struct sockaddr *)&kn_link->src_addr, sockaddr_len(&kn_link->src_addr))) {
150 savederrno = errno;
151 err = -1;
152 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to bind listener socket: %s",
153 strerror(savederrno));
154 goto exit_error;
155 }
156 memset(&ev, 0, sizeof(struct epoll_event));
157 ev.events = EPOLLIN;
158 ev.data.fd = sock;
159
160 if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_ADD, sock, &ev)) {
161 savederrno = errno;
162 err = -1;
163 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to add listener to epoll pool: %s",
164 strerror(savederrno));
165 goto exit_error;
166 }
167
168 info->on_epoll = 1;
169
170 if (_set_fd_tracker(knet_h, sock, KNET_TRANSPORT_UDP, 0, sockaddr_len(&kn_link->src_addr), info, -1) < 0) {
171 savederrno = errno;
172 err = -1;
173 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set fd tracker: %s",
174 strerror(savederrno));
175 goto exit_error;
176 }
177
178 memmove(&info->local_address, &kn_link->src_addr, sizeof(struct sockaddr_storage));
179 info->socket_fd = sock;
180 qb_list_add(&info->list, &handle_info->links_list);
181
182 kn_link->outsock = sock;
183 kn_link->transport_link = info;
184 kn_link->transport_connected = 1;
185
186 exit_error:
187 if (err) {
188 if (info) {
189 if (info->on_epoll) {
190 epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, sock, &ev);
191 }
192 free(info);
193 }
194 if (sock >= 0) {
195 close(sock);
196 }
197 }
198 errno = savederrno;
199 return err;
200 }
201
202 int udp_transport_link_clear_config(knet_handle_t knet_h, struct knet_link *kn_link)
203 {
204 int err = 0, savederrno = 0;
205 int found = 0;
206 struct knet_host *host;
207 int link_idx;
208 udp_link_info_t *info = kn_link->transport_link;
209 struct epoll_event ev;
210
211 for (host = knet_h->host_head; host != NULL; host = host->next) {
212 for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
213 if (&host->link[link_idx] == kn_link)
214 continue;
215
216 if (host->link[link_idx].transport_link == info) {
217 found = 1;
218 break;
219 }
220 }
221 }
222
223 if (found) {
224 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "UDP socket %d still in use", info->socket_fd);
225 savederrno = EBUSY;
226 err = -1;
227 goto exit_error;
228 }
229
230 if (info->on_epoll) {
231 memset(&ev, 0, sizeof(struct epoll_event));
232 ev.events = EPOLLIN;
233 ev.data.fd = info->socket_fd;
234
235 if (epoll_ctl(knet_h->recv_from_links_epollfd, EPOLL_CTL_DEL, info->socket_fd, &ev) < 0) {
236 savederrno = errno;
237 err = -1;
238 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to remove UDP socket from epoll poll: %s",
239 strerror(errno));
240 goto exit_error;
241 }
242 info->on_epoll = 0;
243 }
244
245 if (_set_fd_tracker(knet_h, info->socket_fd, KNET_MAX_TRANSPORTS, 0, sockaddr_len(&kn_link->src_addr), NULL, -1) < 0) {
246 savederrno = errno;
247 err = -1;
248 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Unable to set fd tracker: %s",
249 strerror(savederrno));
250 goto exit_error;
251 }
252
253 close(info->socket_fd);
254 qb_list_del(&info->list);
255 free(kn_link->transport_link);
256
257 exit_error:
258 errno = savederrno;
259 return err;
260 }
261
262 int udp_transport_free(knet_handle_t knet_h)
263 {
264 udp_handle_info_t *handle_info;
265
266 if (!knet_h->transports[KNET_TRANSPORT_UDP]) {
267 errno = EINVAL;
268 return -1;
269 }
270
271 handle_info = knet_h->transports[KNET_TRANSPORT_UDP];
272
273 /*
274 * keep it here while we debug list usage and such
275 */
276 if (!qb_list_empty(&handle_info->links_list)) {
277 log_err(knet_h, KNET_SUB_TRANSP_UDP, "Internal error. handle list is not empty");
278 return -1;
279 }
280
281 free(handle_info);
282
283 knet_h->transports[KNET_TRANSPORT_UDP] = NULL;
284
285 return 0;
286 }
287
288 int udp_transport_init(knet_handle_t knet_h)
289 {
290 udp_handle_info_t *handle_info;
291
292 if (knet_h->transports[KNET_TRANSPORT_UDP]) {
293 errno = EEXIST;
294 return -1;
295 }
296
297 handle_info = malloc(sizeof(udp_handle_info_t));
298 if (!handle_info) {
299 return -1;
300 }
301
302 memset(handle_info, 0, sizeof(udp_handle_info_t));
303
304 knet_h->transports[KNET_TRANSPORT_UDP] = handle_info;
305
306 qb_list_init(&handle_info->links_list);
307
308 return 0;
309 }
310
311 #if defined (IP_RECVERR) || defined (IPV6_RECVERR)
312 static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
313 {
314 int err = 0, savederrno = 0;
315 int got_err = 0;
316 char buffer[1024];
317 struct iovec iov;
318 struct msghdr msg;
319 struct cmsghdr *cmsg;
320 struct sock_extended_err *sock_err;
321 struct icmphdr icmph;
322 struct sockaddr_storage remote;
323 struct sockaddr_storage *origin;
324 char addr_str[KNET_MAX_HOST_LEN];
325 char port_str[KNET_MAX_PORT_LEN];
326 char addr_remote_str[KNET_MAX_HOST_LEN];
327 char port_remote_str[KNET_MAX_PORT_LEN];
328
329 iov.iov_base = &icmph;
330 iov.iov_len = sizeof(icmph);
331 msg.msg_name = (void*)&remote;
332 msg.msg_namelen = sizeof(remote);
333 msg.msg_iov = &iov;
334 msg.msg_iovlen = 1;
335 msg.msg_flags = 0;
336 msg.msg_control = buffer;
337 msg.msg_controllen = sizeof(buffer);
338
339 for (;;) {
340 err = recvmsg(sockfd, &msg, MSG_ERRQUEUE);
341 savederrno = errno;
342 if (err < 0) {
343 if (!got_err) {
344 errno = savederrno;
345 return -1;
346 } else {
347 return 0;
348 }
349 }
350 got_err = 1;
351 for (cmsg = CMSG_FIRSTHDR(&msg);cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
352 if (((cmsg->cmsg_level == SOL_IP) && (cmsg->cmsg_type == IP_RECVERR)) ||
353 ((cmsg->cmsg_level == SOL_IPV6 && (cmsg->cmsg_type == IPV6_RECVERR)))) {
354 sock_err = (struct sock_extended_err*)(void *)CMSG_DATA(cmsg);
355 if (sock_err) {
356 switch (sock_err->ee_origin) {
357 case SO_EE_ORIGIN_NONE: /* no origin */
358 case SO_EE_ORIGIN_LOCAL: /* local source (EMSGSIZE) */
359 if (sock_err->ee_errno == EMSGSIZE || sock_err->ee_errno == EPERM) {
360 if (pthread_mutex_lock(&knet_h->kmtu_mutex) != 0) {
361 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Unable to get mutex lock");
362 knet_h->kernel_mtu = 0;
363 break;
364 } else {
365 knet_h->kernel_mtu = sock_err->ee_info;
366 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu);
367 pthread_mutex_unlock(&knet_h->kmtu_mutex);
368 }
369
370 force_pmtud_run(knet_h, KNET_SUB_TRANSP_UDP, 0, 0);
371 }
372 /*
373 * those errors are way too noisy
374 */
375 break;
376 case SO_EE_ORIGIN_ICMP: /* ICMP */
377 case SO_EE_ORIGIN_ICMP6: /* ICMP6 */
378 origin = (struct sockaddr_storage *)(void *)SO_EE_OFFENDER(sock_err);
379 if (knet_addrtostr(origin, sizeof(*origin),
380 addr_str, KNET_MAX_HOST_LEN,
381 port_str, KNET_MAX_PORT_LEN) < 0) {
382 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from unknown source: %s", strerror(sock_err->ee_errno));
383
384 } else {
385 if (knet_addrtostr(&remote, sizeof(remote),
386 addr_remote_str, KNET_MAX_HOST_LEN,
387 port_remote_str, KNET_MAX_PORT_LEN) < 0) {
388 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s destination unknown", addr_str, strerror(sock_err->ee_errno));
389 } else {
390 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s %s", addr_str, strerror(sock_err->ee_errno), addr_remote_str);
391 if ((sock_err->ee_errno == ECONNREFUSED) || /* knet is not running on the other node */
392 (sock_err->ee_errno == ECONNABORTED) || /* local kernel closed the socket */
393 (sock_err->ee_errno == ENONET) || /* network does not exist */
394 (sock_err->ee_errno == ENETUNREACH) || /* network unreachable */
395 (sock_err->ee_errno == EHOSTUNREACH) || /* host unreachable */
396 (sock_err->ee_errno == EHOSTDOWN) || /* host down (from kernel/net/ipv4/icmp.c */
397 (sock_err->ee_errno == ENETDOWN)) { /* network down */
398 struct knet_host *host = NULL;
399 struct knet_link *kn_link = NULL;
400 int link_idx, found = 0;
401
402 for (host = knet_h->host_head; host != NULL; host = host->next) {
403 for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) {
404 kn_link = &host->link[link_idx];
405 if (kn_link->outsock == sockfd) {
406 if (!cmpaddr(&remote, &kn_link->dst_addr)) {
407 found = 1;
408 break;
409 }
410 }
411 }
412 if (found) {
413 break;
414 }
415 }
416
417 if ((host) && (kn_link) &&
418 (kn_link->status.connected)) {
419 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Setting down host %u link %i", host->host_id, kn_link->link_id);
420 /*
421 * setting transport_connected = 0 will trigger
422 * thread_heartbeat link_down process.
423 *
424 * the process terminates calling into transport_link_down
425 * below that will set transport_connected = 1
426 */
427 kn_link->transport_connected = 0;
428 }
429
430 }
431 }
432 }
433 break;
434 }
435 } else {
436 log_debug(knet_h, KNET_SUB_TRANSP_UDP, "No data in MSG_ERRQUEUE");
437 }
438 }
439 }
440 }
441 }
442 #else
443 static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
444 {
445 return 0;
446 }
447 #endif
448
449 int udp_transport_rx_sock_error(knet_handle_t knet_h, int sockfd, int recv_err, int recv_errno)
450 {
451 if (recv_errno == EAGAIN) {
452 read_errs_from_sock(knet_h, sockfd);
453 }
454 return 0;
455 }
456
457 int udp_transport_tx_sock_error(knet_handle_t knet_h, int sockfd, int subsys, int recv_err, int recv_errno)
458 {
459 if (recv_err < 0) {
460 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "tx_sock_error, subsys=%s, recv_err=%d: %s", knet_log_get_subsystem_name(subsys), recv_err, strerror(recv_errno));
461 if ((recv_errno == EMSGSIZE) || ((recv_errno == EPERM) && ((subsys == KNET_SUB_TX) || (subsys == KNET_SUB_PMTUD)))) {
462 read_errs_from_sock(knet_h, sockfd);
463 return 0;
464 }
465 if ((recv_errno == EINVAL) || (recv_errno == EPERM) ||
466 (recv_errno == ENETUNREACH) || (recv_errno == ENETDOWN) ||
467 (recv_errno == EHOSTUNREACH)) {
468 if ((recv_errno == ENETUNREACH) || (recv_errno == ENETDOWN)) {
469 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is unreachable.", sockfd);
470 }
471 return -1;
472 }
473 if ((recv_errno == ENOBUFS) || (recv_errno == EAGAIN)) {
474 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Sock: %d is overloaded. Slowing TX down", sockfd);
475 usleep(KNET_THREADS_TIMERES / 16);
476 } else {
477 read_errs_from_sock(knet_h, sockfd);
478 }
479 return 1;
480 }
481
482 return 0;
483 }
484
485 /*
486 * If the received IP addr doesn't match the destination IP
487 * then weird routing is going on.
488 */
489 static void check_dst_addr_is_valid(knet_handle_t knet_h, int sockfd, struct msghdr *msg)
490 {
491 #if defined(IP_PKTINFO) || defined(IPV6_PKTINFO)
492 struct cmsghdr *cmsg;
493
494 for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
495 int pkt_ifindex = -1;
496 int ifindex = knet_h->knet_transport_fd_tracker[sockfd].ifindex;
497 struct sockaddr_storage dstaddr;
498 #ifdef IP_PKTINFO
499 if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_PKTINFO) {
500 struct in_pktinfo *pi = (void*)CMSG_DATA(cmsg);
501 struct sockaddr_in *dstaddr4 = (struct sockaddr_in *)&dstaddr;
502
503 pkt_ifindex = pi->ipi_ifindex;
504 dstaddr4->sin_family = AF_INET;
505 dstaddr4->sin_port = 0; /* unknown to PKTINFO */
506 dstaddr4->sin_addr.s_addr = pi->ipi_addr.s_addr;
507 }
508 #endif
509 #ifdef IPV6_PKTINFO
510 if (cmsg->cmsg_level == IPPROTO_IPV6 && cmsg->cmsg_type == IPV6_PKTINFO) {
511 struct in6_pktinfo *pi = (void*)CMSG_DATA(cmsg);
512 struct sockaddr_in6 *dstaddr6 = (struct sockaddr_in6 *)&dstaddr;
513 memset(dstaddr6, 0, sizeof(struct sockaddr_in6));
514
515 pkt_ifindex = pi->ipi6_ifindex;
516 dstaddr6->sin6_family = AF_INET6;
517 dstaddr6->sin6_port = 0; /* unknown to PKTINFO */
518 memcpy(&dstaddr6->sin6_addr, (char *)&pi->ipi6_addr, sizeof(pi->ipi6_addr));
519 }
520 #endif
521 if (ifindex != -1 && pkt_ifindex != -1 && ifindex != pkt_ifindex) {
522 char srcaddr_s[KNET_MAX_HOST_LEN];
523 char srcport_s[KNET_MAX_PORT_LEN];
524 char dstaddr_s[KNET_MAX_HOST_LEN];
525 char dstport_s[KNET_MAX_PORT_LEN];
526 char expected_ifname[IF_NAMESIZE];
527 char used_ifname[IF_NAMESIZE];
528
529 /* Make as detailed a message as we can */
530 if ((if_indextoname(pkt_ifindex, used_ifname) == NULL) ||
531 (if_indextoname(ifindex, expected_ifname) == NULL)) {
532 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Received packet on ifindex %d when expected ifindex %d", pkt_ifindex, ifindex);
533 } else if (knet_addrtostr(msg->msg_name, msg->msg_namelen,
534 srcaddr_s, sizeof(srcaddr_s),
535 srcport_s, sizeof(srcport_s)) != 0) {
536 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Received packet on i/f %s when expected i/f %s", used_ifname, expected_ifname);
537 } else if (knet_addrtostr((struct sockaddr_storage *)&dstaddr, sizeof(dstaddr),
538 dstaddr_s, sizeof(dstaddr_s),
539 dstport_s, sizeof(dstport_s)) != 0) {
540 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Received packet from %s on i/f %s when expected %s", srcaddr_s, used_ifname, expected_ifname);
541 } else {
542 log_trace(knet_h, KNET_SUB_TRANSP_UDP, "Received packet from %s to %s on i/f %s when expected %s", srcaddr_s, dstaddr_s, used_ifname, expected_ifname);
543 }
544 }
545 }
546 #endif
547 }
548
549 int udp_transport_rx_is_data(knet_handle_t knet_h, int sockfd, struct knet_mmsghdr *msg)
550 {
551 if (msg->msg_len == 0)
552 return KNET_TRANSPORT_RX_NOT_DATA_CONTINUE;
553
554 check_dst_addr_is_valid(knet_h, sockfd, &msg->msg_hdr);
555
556 return KNET_TRANSPORT_RX_IS_DATA;
557 }
558
559 int udp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet_link *kn_link)
560 {
561 kn_link->status.dynconnected = 1;
562 return 0;
563 }
564
565 int udp_transport_link_is_down(knet_handle_t knet_h, struct knet_link *kn_link)
566 {
567 /*
568 * see comments about handling ICMP error messages
569 */
570 kn_link->transport_connected = 1;
571 return 0;
572 }
573