void *buf_addr; /**< Virtual address of segment buffer. */ /** * Physical address of segment buffer. * Force alignment to 8-bytes, so as to ensure we have the exact * same mbuf cacheline0 layout for 32-bit and 64-bit. This makes * working on vector drivers easier. */ rte_iova_t buf_iova __rte_aligned(sizeof(rte_iova_t));
/* next 8 bytes are initialised on RX descriptor rearm */ RTE_MARKER64 rearm_data; uint16_t data_off;
/** * Reference counter. Its size should at least equal to the size * of port field (16 bits), to support zero-copy broadcast. * It should only be accessed using the following functions: * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and * rte_mbuf_refcnt_set(). The functionality of these functions (atomic, * or non-atomic) is controlled by the RTE_MBUF_REFCNT_ATOMIC flag. */ uint16_t refcnt;
/** * Number of segments. Only valid for the first segment of an mbuf * chain. */ uint16_t nb_segs;
/** Input port (16 bits to support more than 256 virtual ports). * The event eth Tx adapter uses this field to specify the output port. */ uint16_t port;
uint64_t ol_flags; /**< Offload features. */
/* remaining bytes are set on RX when pulling packet from descriptor */ RTE_MARKER rx_descriptor_fields1;
/* * The packet type, which is the combination of outer/inner L2, L3, L4 * and tunnel types. The packet_type is about data really present in the * mbuf. Example: if vlan stripping is enabled, a received vlan packet * would have RTE_PTYPE_L2_ETHER and not RTE_PTYPE_L2_VLAN because the * vlan is stripped from the data. */ RTE_STD_C11 union { uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */ __extension__ struct { uint8_t l2_type:4; /**< (Outer) L2 type. */ uint8_t l3_type:4; /**< (Outer) L3 type. */ uint8_t l4_type:4; /**< (Outer) L4 type. */ uint8_t tun_type:4; /**< Tunnel type. */ RTE_STD_C11 union { uint8_t inner_esp_next_proto; /**< ESP next protocol type, valid if * RTE_PTYPE_TUNNEL_ESP tunnel type is set * on both Tx and Rx. */ __extension__ struct { uint8_t inner_l2_type:4; /**< Inner L2 type. */ uint8_t inner_l3_type:4; /**< Inner L3 type. */ }; }; uint8_t inner_l4_type:4; /**< Inner L4 type. */ }; };
uint32_t pkt_len; /**< Total pkt len: sum of all segments. */ uint16_t data_len; /**< Amount of data in segment buffer. */ /** VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_VLAN is set. */ uint16_t vlan_tci;
RTE_STD_C11 union { union { uint32_t rss; /**< RSS hash result if RSS enabled */ struct { union { struct { uint16_t hash; uint16_t id; }; uint32_t lo; /**< Second 4 flexible bytes */ }; uint32_t hi; /**< First 4 flexible bytes or FD ID, dependent * on RTE_MBUF_F_RX_FDIR_* flag in ol_flags. */ } fdir; /**< Filter identifier if FDIR enabled */ structrte_mbuf_schedsched; /**< Hierarchical scheduler : 8 bytes */ struct { uint32_t reserved1; uint16_t reserved2; uint16_t txq; /**< The event eth Tx adapter uses this field * to store Tx queue id. * @see rte_event_eth_tx_adapter_txq_set() */ } txadapter; /**< Eventdev ethdev Tx adapter */ /**< User defined tags. See rte_distributor_process() */ uint32_t usr; } hash; /**< hash information */ };
/** Outer VLAN TCI (CPU order), valid if RTE_MBUF_F_RX_QINQ is set. */ uint16_t vlan_tci_outer;
uint16_t buf_len; /**< Length of segment buffer. */
structrte_mempool *pool;/**< Pool from which mbuf was allocated. */
/* second cache line - fields only used in slow path or on TX */ RTE_MARKER cacheline1 __rte_cache_min_aligned;
/** * Next segment of scattered packet. Must be NULL in the last segment or * in case of non-segmented packet. */ structrte_mbuf *next;
/* * Fields for Tx offloading of tunnels. * These are undefined for packets which don't request * any tunnel offloads (outer IP or UDP checksum, * tunnel TSO). * * PMDs should not use these fields unconditionally * when calculating offsets. * * Applications are expected to set appropriate tunnel * offload flags when they fill in these fields. */ uint64_t outer_l3_len:RTE_MBUF_OUTL3_LEN_BITS; /**< Outer L3 (IP) Hdr Length. */ uint64_t outer_l2_len:RTE_MBUF_OUTL2_LEN_BITS; /**< Outer L2 (MAC) Hdr Length. */
/** Shared data for external buffer attached to mbuf. See * rte_pktmbuf_attach_extbuf(). */ structrte_mbuf_ext_shared_info *shinfo;
/** Size of the application private data. In case of an indirect * mbuf, it stores the direct mbuf private data size. */ uint16_t priv_size;
/** Timesync flags for use with IEEE1588. */ uint16_t timesync;
uint32_t dynfield1[9]; /**< Reserved for dynamic fields. */ } __rte_cache_aligned;
rte_fbarray
1 2 3 4 5 6 7 8
structrte_fbarray { char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */ unsignedint count; /**< number of entries stored */ unsignedint len; /**< current length of the array */ unsignedint elt_sz; /**< size of each element */ void *data; /**< data pointer */ rte_rwlock_t rwlock; /**< multiprocess lock */ };
/** * Parse the argument. * 将传入的字符串参数转化为 internal_config 结构体内部参数 */ rc = eal_parse_args(argc, argv); if (rc < 0) { uk_pr_err("Failed to parse the argument to library\n"); return rc; }
uk_pr_debug("parsed argument\n"); /** * Process the arguments on the device * 在 Unikraft 中,该函数无实际效果 */ rc = eal_option_device_parse(); if (rc < 0) { uk_pr_err("Failed to parse the device\n"); return rc; } uk_pr_debug("dev_args parsed\n"); // 如果是主进程则初始化配置 rte_config_init();
/** * Configure the heap based on the huge page information. * 配置大内存页 */ rc = eal_hugepage_info_init(); if (rc < 0) { uk_pr_err("Failed to fetch hugetable info\n"); return rc; }
/** * Memzone initialization configure the fbarray. * 初始化内存区子系统 */ rc = rte_eal_memzone_init(); if (rc < 0) { uk_pr_err("Failed to initialize the memory zone\n"); return rc; }
/** * TODO: * Check if we need changes to configure * - memseg * - memalloc * 初始化内存子系统 */ rc = rte_eal_memory_init(); if (rc < 0) { uk_pr_err("Failed to initialize the memory\n"); return rc; } // 初始化堆 rc = rte_eal_malloc_heap_init(); if (rc < 0) { uk_pr_err("Failed to initialize heap\n"); return rc; } // 初始化队列 if (rte_eal_tailqs_init() < 0) { uk_pr_err("Cannot init tail queues for objects\n"); rte_errno = EFAULT; return-1; } // 初始化线程并设置 affinity eal_thread_init_master(rte_config.master_lcore);
rc = eal_uknetdev_init(); if (rc < 0) { uk_pr_err("Failed(%d) to initializes the netdevice\n", rc); return rc; } return0; }
void eal_reset_internal_config(struct internal_config *internal_cfg) { int i;
internal_cfg->memory = 0; internal_cfg->force_nrank = 0; internal_cfg->force_nchannel = 0; internal_cfg->hugefile_prefix = NULL; internal_cfg->hugepage_dir = NULL; internal_cfg->force_sockets = 0; /* zero out the NUMA config */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) internal_cfg->socket_mem[i] = 0; internal_cfg->force_socket_limits = 0; /* zero out the NUMA limits config */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) internal_cfg->socket_limit[i] = 0; /* zero out hugedir descriptors */ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) { memset(&internal_cfg->hugepage_info[i], 0, sizeof(internal_cfg->hugepage_info[0])); internal_cfg->hugepage_info[i].lock_descriptor = -1; } internal_cfg->base_virtaddr = 0;
/* if set to NONE, interrupt mode is determined automatically */ internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE; memset(internal_cfg->vfio_vf_token, 0, sizeof(internal_cfg->vfio_vf_token));
int rte_eal_cpu_init(void) { /* pointer to global configuration */ structrte_config *config = rte_eal_get_configuration(); unsigned lcore_id; unsigned count = 0; unsignedint socket_id, prev_socket_id; int lcore_to_socket_id[RTE_MAX_LCORE];
/* * Parse the maximum set of logical cores, detect the subset of running * ones and enable them by default. */ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { lcore_config[lcore_id].core_index = count;
/* init cpuset for per lcore config */ CPU_ZERO(&lcore_config[lcore_id].cpuset);
/* By default, lcore 1:1 map to cpu id */ CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset);
/* By default, each detected core is enabled */ config->lcore_role[lcore_id] = ROLE_RTE; lcore_config[lcore_id].core_role = ROLE_RTE; lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); lcore_config[lcore_id].socket_id = socket_id; RTE_LOG(DEBUG, EAL, "Detected lcore %u as " "core %u on socket %u\n", lcore_id, lcore_config[lcore_id].core_id, lcore_config[lcore_id].socket_id); count++; } for (; lcore_id < CPU_SETSIZE; lcore_id++) { if (eal_cpu_detected(lcore_id) == 0) continue; RTE_LOG(DEBUG, EAL, "Skipped lcore %u as core %u on socket %u\n", lcore_id, eal_cpu_core_id(lcore_id), eal_cpu_socket_id(lcore_id)); }
/* Set the count of enabled logical cores of the EAL configuration */ config->lcore_count = count; RTE_LOG(DEBUG, EAL, "Maximum logical cores by configuration: %u\n", RTE_MAX_LCORE); RTE_LOG(INFO, EAL, "Detected CPU lcores: %u\n", config->lcore_count);
/* sort all socket id's in ascending order */ qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id), sizeof(lcore_to_socket_id[0]), socket_id_cmp);
default: /** * Parse common options later to enable overriding * default common options */ rc = eal_parse_common_option(opt, optarg, &internal_config);
/* common parser handled this option */ if (rc == 0) continue; /* common parser is not happy */ if (rc < 0) { eal_usage(prgname); rc = -1; goto out; } elseif (opt < OPT_LONG_MIN_NUM && isprint(opt)) uk_pr_err("Option %c is not supported on Unikraft\n", opt); elseif (opt >= OPT_LONG_MIN_NUM && opt < OPT_LONG_MAX_NUM) uk_pr_err("Option %s is not supported on Unikraft\n", eal_long_options[option_index].name); else uk_pr_err( "Option %d is not supported on Unikraft\n", opt);
/* unlock mem hotplug here. it's safe for primary as no requests can * even come before primary itself is fully initialized, and secondaries * do not need to initialize the heap. */ rte_mcfg_mem_read_unlock();
/* secondary process does not need to initialize anything */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return0;
/* add all IOVA-contiguous areas to the heap */ return rte_memseg_contig_walk(malloc_add_seg, NULL); }
int rte_eal_tailqs_init(void) { structrte_tailq_elem *t;
rte_tailqs_count = 0;
TAILQ_FOREACH(t, &rte_tailq_elem_head, next) { /* second part of register job for "early" tailqs, see * rte_eal_tailq_register and EAL_REGISTER_TAILQ */ rte_eal_tailq_update(t); if (t->head == NULL) { RTE_LOG(ERR, EAL, "Cannot initialize tailq: %s\n", t->name); /* TAILQ_REMOVE not needed, error is already fatal */ goto fail; } }
return0;
fail: rte_dump_tailq(stderr); return-1; }
eal_thread_init_master
1 2 3 4 5 6 7 8 9
voideal_thread_init_master(unsigned lcore_id) { /* set the lcore ID in per-lcore memory area */ RTE_PER_LCORE(_lcore_id) = lcore_id;
/* set CPU affinity */ if (eal_thread_set_affinity() < 0) UK_CRASH("Failed to set thread affinity\n"); }
#ifdef CONFIG_PCI_BUS /* now do all data allocation - for eth_dev structure, dummy pci driver * and internal (dev_private) data */ pci_dev = rte_zmalloc_socket(name, sizeof(*pci_dev), 0, socket_id); if (pci_dev == NULL) goto err;
/* public fast-path API */ structrte_eth_fp_opsrte_eth_fp_ops[RTE_MAX_ETHPORTS]; // 该函数的调用者是 init 流程中的 uk_ethdev_create void rte_eth_dev_probing_finish(struct rte_eth_dev *dev) { if (dev == NULL) return;
/* * for secondary process, at that point we expect device * to be already 'usable', so shared data and all function pointers * for fast-path devops have to be setup properly inside rte_eth_dev. */ if (rte_eal_process_type() == RTE_PROC_SECONDARY) eth_dev_fp_ops_setup(rte_eth_fp_ops + dev->data->port_id, dev);
if (qd == NULL) { RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n", queue_id, port_id); return0; } #endif // 调用 fast path 中注册好的 rx_pkt_burst nb_rx = p->rx_pkt_burst(qd, rx_pkts, nb_pkts);
#ifdef RTE_ETHDEV_RXTX_CALLBACKS { void *cb;
/* __ATOMIC_RELEASE memory order was used when the * call back was inserted into the list. * Since there is a clear dependency between loading * cb and cb->fn/cb->next, __ATOMIC_ACQUIRE memory order is * not required. */ cb = __atomic_load_n((void **)&p->rxq.clbk[queue_id], __ATOMIC_RELAXED); if (unlikely(cb != NULL)) nb_rx = rte_eth_call_rx_callbacks(port_id, queue_id, rx_pkts, nb_rx, nb_pkts, cb); } #endif // 创建一个追踪点 rte_ethdev_trace_rx_burst(port_id, queue_id, (void **)rx_pkts, nb_rx); return nb_rx; }