// /Users/kangxiaoning/workspace/linux-3.10/include/linux/netdevice.h
struct net_device {
/*
* This is the first field of the "visible" part of this structure
* (i.e. as seen by users in the "Space.c" file). It is the name
* of the interface.
*/
char name[IFNAMSIZ];
/* device name hash chain, please keep it close to name[] */
struct hlist_node name_hlist;
/* snmp alias */
char *ifalias;
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
unsigned long mem_end; /* shared mem end */
unsigned long mem_start; /* shared mem start */
unsigned long base_addr; /* device I/O address */
unsigned int irq; /* device IRQ number */
/*
* Some hardware also needs these fields, but they are not
* part of the usual set specified in Space.c.
*/
unsigned long state;
struct list_head dev_list;
struct list_head napi_list;
struct list_head unreg_list;
struct list_head upper_dev_list; /* List of upper devices */
/* currently active device features */
netdev_features_t features;
/* user-changeable features */
netdev_features_t hw_features;
/* user-requested features */
netdev_features_t wanted_features;
/* mask of features inheritable by VLAN devices */
netdev_features_t vlan_features;
/* mask of features inherited by encapsulating devices
* This field indicates what encapsulation offloads
* the hardware is capable of doing, and drivers will
* need to set them appropriately.
*/
netdev_features_t hw_enc_features;
/* Interface index. Unique device identifier */
int ifindex;
int iflink;
struct net_device_stats stats;
atomic_long_t rx_dropped; /* dropped packets by core network
* Do not use this in drivers.
*/
#ifdef CONFIG_WIRELESS_EXT
/* List of functions to handle Wireless Extensions (instead of ioctl).
* See <net/iw_handler.h> for details. Jean II */
const struct iw_handler_def * wireless_handlers;
/* Instance data managed by the core of Wireless Extensions. */
struct iw_public_data * wireless_data;
#endif
/* Management operations */
const struct net_device_ops *netdev_ops;
const struct ethtool_ops *ethtool_ops;
/* Hardware header description */
const struct header_ops *header_ops;
unsigned int flags; /* interface flags (a la BSD) */
unsigned int priv_flags; /* Like 'flags' but invisible to userspace.
* See if.h for definitions. */
unsigned short gflags;
unsigned short padded; /* How much padding added by alloc_netdev() */
unsigned char operstate; /* RFC2863 operstate */
unsigned char link_mode; /* mapping policy to operstate */
unsigned char if_port; /* Selectable AUI, TP,..*/
unsigned char dma; /* DMA channel */
unsigned int mtu; /* interface MTU value */
unsigned short type; /* interface hardware type */
unsigned short hard_header_len; /* hardware hdr length */
/* extra head- and tailroom the hardware may need, but not in all cases
* can this be guaranteed, especially tailroom. Some cases also use
* LL_MAX_HEADER instead to allocate the skb.
*/
unsigned short needed_headroom;
unsigned short needed_tailroom;
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
unsigned char addr_assign_type; /* hw address assignment type */
unsigned char addr_len; /* hardware address length */
unsigned char neigh_priv_len;
unsigned short dev_id; /* for shared network cards */
spinlock_t addr_list_lock;
struct netdev_hw_addr_list uc; /* Unicast mac addresses */
struct netdev_hw_addr_list mc; /* Multicast mac addresses */
struct netdev_hw_addr_list dev_addrs; /* list of device
* hw addresses
*/
#ifdef CONFIG_SYSFS
struct kset *queues_kset;
#endif
bool uc_promisc;
unsigned int promiscuity;
unsigned int allmulti;
/* Protocol specific pointers */
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info; /* VLAN info */
#endif
#if IS_ENABLED(CONFIG_NET_DSA)
struct dsa_switch_tree *dsa_ptr; /* dsa specific data */
#endif
void *atalk_ptr; /* AppleTalk link */
struct in_device __rcu *ip_ptr; /* IPv4 specific data */
struct dn_dev __rcu *dn_ptr; /* DECnet specific data */
struct inet6_dev __rcu *ip6_ptr; /* IPv6 specific data */
void *ax25_ptr; /* AX.25 specific data */
struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data,
assign before registering */
/*
* Cache lines mostly used on receive path (including eth_type_trans())
*/
unsigned long last_rx; /* Time of last Rx
* This should not be set in
* drivers, unless really needed,
* because network stack (bonding)
* use it if/when necessary, to
* avoid dirtying this cache line.
*/
/* Interface address info used in eth_type_trans() */
unsigned char *dev_addr; /* hw address, (before bcast
because most packets are
unicast) */
#ifdef CONFIG_RPS
struct netdev_rx_queue *_rx;
/* Number of RX queues allocated at register_netdev() time */
unsigned int num_rx_queues;
/* Number of RX queues currently active in device */
unsigned int real_num_rx_queues;
#endif
rx_handler_func_t __rcu *rx_handler;
void __rcu *rx_handler_data;
struct netdev_queue __rcu *ingress_queue;
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
/*
* Cache lines mostly used on transmit path
*/
struct netdev_queue *_tx ____cacheline_aligned_in_smp;
/* Number of TX queues allocated at alloc_netdev_mq() time */
unsigned int num_tx_queues;
/* Number of TX queues currently active in device */
unsigned int real_num_tx_queues;
/* root qdisc from userspace point of view */
struct Qdisc *qdisc;
unsigned long tx_queue_len; /* Max frames per queue allowed */
spinlock_t tx_global_lock;
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps;
#endif
#ifdef CONFIG_RFS_ACCEL
/* CPU reverse-mapping for RX completion interrupts, indexed
* by RX queue number. Assigned by driver. This must only be
* set if the ndo_rx_flow_steer operation is defined. */
struct cpu_rmap *rx_cpu_rmap;
#endif
/* These may be needed for future network-power-down code. */
/*
* trans_start here is expensive for high speed devices on SMP,
* please use netdev_queue->trans_start instead.
*/
unsigned long trans_start; /* Time (in jiffies) of last Tx */
int watchdog_timeo; /* used by dev_watchdog() */
struct timer_list watchdog_timer;
/* Number of references to this device */
int __percpu *pcpu_refcnt;
/* delayed register/unregister */
struct list_head todo_list;
/* device index hash chain */
struct hlist_node index_hlist;
struct list_head link_watch_list;
/* register/unregister state machine */
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
NETREG_DUMMY, /* dummy device for NAPI poll */
} reg_state:8;
bool dismantle; /* device is going do be freed */
enum {
RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING,
} rtnl_link_state:16;
/* Called from unregister, can be used to call free_netdev */
void (*destructor)(struct net_device *dev);
#ifdef CONFIG_NETPOLL
struct netpoll_info __rcu *npinfo;
#endif
#ifdef CONFIG_NET_NS
/* Network namespace this network device is inside */
struct net *nd_net;
#endif
/* mid-layer private */
union {
void *ml_priv;
struct pcpu_lstats __percpu *lstats; /* loopback stats */
struct pcpu_tstats __percpu *tstats; /* tunnel stats */
struct pcpu_dstats __percpu *dstats; /* dummy stats */
struct pcpu_vstats __percpu *vstats; /* veth stats */
};
/* GARP */
struct garp_port __rcu *garp_port;
/* MRP */
struct mrp_port __rcu *mrp_port;
/* class/net/name entry */
struct device dev;
/* space for optional device, statistics, and wireless sysfs groups */
const struct attribute_group *sysfs_groups[4];
/* rtnetlink link ops */
const struct rtnl_link_ops *rtnl_link_ops;
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;
#define GSO_MAX_SEGS 65535
u16 gso_max_segs;
#ifdef CONFIG_DCB
/* Data Center Bridging netlink ops */
const struct dcbnl_rtnl_ops *dcbnl_ops;
#endif
u8 num_tc;
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
u8 prio_tc_map[TC_BITMASK + 1];
#if IS_ENABLED(CONFIG_FCOE)
/* max exchange id for FCoE LRO by ddp */
unsigned int fcoe_ddp_xid;
#endif
#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
struct netprio_map __rcu *priomap;
#endif
/* phy device may attach itself for hardware timestamping */
struct phy_device *phydev;
struct lock_class_key *qdisc_tx_busylock;
/* group the device belongs to */
int group;
struct pm_qos_request pm_qos_req;
};
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;
BUG_ON(!dev_boot_phase);
if (dev_proc_init())
goto out;
if (netdev_kobject_init())
goto out;
INIT_LIST_HEAD(&ptype_all);
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
INIT_LIST_HEAD(&offload_base);
if (register_pernet_subsys(&netdev_net_ops))
goto out;
/*
* Initialise the packet receive queues.
*/
for_each_possible_cpu(i) {
struct softnet_data *sd = &per_cpu(softnet_data, i);
memset(sd, 0, sizeof(*sd));
skb_queue_head_init(&sd->input_pkt_queue);
skb_queue_head_init(&sd->process_queue);
sd->completion_queue = NULL;
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue = NULL;
sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS
sd->csd.func = rps_trigger_softirq;
sd->csd.info = sd;
sd->csd.flags = 0;
sd->cpu = i;
#endif
sd->backlog.poll = process_backlog;
sd->backlog.weight = weight_p;
sd->backlog.gro_list = NULL;
sd->backlog.gro_count = 0;
}
dev_boot_phase = 0;
/* The loopback device is special if any other network devices
* is present in a network namespace the loopback device must
* be present. Since we now dynamically allocate and free the
* loopback device ensure this invariant is maintained by
* keeping the loopback device as the first device on the
* list of network devices. Ensuring the loopback devices
* is the first device that appears and the last network device
* that disappears.
*/
if (register_pernet_device(&loopback_net_ops))
goto out;
if (register_pernet_device(&default_device_ops))
goto out;
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
rc = 0;
out:
return rc;
}
// /Users/kangxiaoning/workspace/linux-3.10/include/linux/pci.h
/*
* pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
*/
#define pci_register_driver(driver) \
__pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
/**
* __pci_register_driver - register a new pci driver
* @drv: the driver structure to register
* @owner: owner module of drv
* @mod_name: module name string
*
* Adds the driver structure to the list of registered drivers.
* Returns a negative value on error, otherwise 0.
* If no error occurred, the driver remains registered even if
* no device was claimed during registration.
*/
int __pci_register_driver(struct pci_driver *drv, struct module *owner,
const char *mod_name)
{
/* initialize common driver fields */
drv->driver.name = drv->name;
drv->driver.bus = &pci_bus_type;
drv->driver.owner = owner;
drv->driver.mod_name = mod_name;
spin_lock_init(&drv->dynids.lock);
INIT_LIST_HEAD(&drv->dynids.list);
/* register with core */
return driver_register(&drv->driver);
}
static int igb_open(struct net_device *netdev)
{
return __igb_open(netdev, false);
}
static int __igb_open(struct net_device *netdev, bool resuming)
{
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
struct pci_dev *pdev = adapter->pdev;
int err;
int i;
/* disallow open during test */
if (test_bit(__IGB_TESTING, &adapter->state)) {
WARN_ON(resuming);
return -EBUSY;
}
if (!resuming)
pm_runtime_get_sync(&pdev->dev);
netif_carrier_off(netdev);
/* allocate transmit descriptors */
// 分配传输的descriptors,实际上是RingBuffer队列
err = igb_setup_all_tx_resources(adapter);
if (err)
goto err_setup_tx;
/* allocate receive descriptors */
// 分配接收的descriptors,实际上是RingBuffer队列
err = igb_setup_all_rx_resources(adapter);
if (err)
goto err_setup_rx;
igb_power_up_link(adapter);
/* before we allocate an interrupt, we must be ready to handle it.
* Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
* as soon as we call pci_request_irq, so we have to setup our
* clean_rx handler before we do so.
*/
igb_configure(adapter);
// 注册中断处理函数
err = igb_request_irq(adapter);
if (err)
goto err_req_irq;
/* Notify the stack of the actual queue counts. */
err = netif_set_real_num_tx_queues(adapter->netdev,
adapter->num_tx_queues);
if (err)
goto err_set_queues;
err = netif_set_real_num_rx_queues(adapter->netdev,
adapter->num_rx_queues);
if (err)
goto err_set_queues;
/* From here on the code is the same as igb_up() */
clear_bit(__IGB_DOWN, &adapter->state);
for (i = 0; i < adapter->num_q_vectors; i++)
napi_enable(&(adapter->q_vector[i]->napi));
/* Clear any pending interrupts. */
rd32(E1000_ICR);
igb_irq_enable(adapter);
/* notify VFs that reset has been completed */
if (adapter->vfs_allocated_count) {
u32 reg_data = rd32(E1000_CTRL_EXT);
reg_data |= E1000_CTRL_EXT_PFRSTD;
wr32(E1000_CTRL_EXT, reg_data);
}
netif_tx_start_all_queues(netdev);
if (!resuming)
pm_runtime_put(&pdev->dev);
/* start the watchdog. */
hw->mac.get_link_status = 1;
schedule_work(&adapter->watchdog_task);
return 0;
err_set_queues:
igb_free_irq(adapter);
err_req_irq:
igb_release_hw_control(adapter);
igb_power_down_link(adapter);
igb_free_all_rx_resources(adapter);
err_setup_rx:
igb_free_all_tx_resources(adapter);
err_setup_tx:
igb_reset(adapter);
if (!resuming)
pm_runtime_put(&pdev->dev);
return err;
}
从igb_setup_all_rx_resources()中可以看到,一个循环中创建了多个队列。
static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
int i, err = 0;
for (i = 0; i < adapter->num_rx_queues; i++) {
err = igb_setup_rx_resources(adapter->rx_ring[i]);
if (err) {
dev_err(&pdev->dev,
"Allocation for Rx Queue %u failed\n", i);
for (i--; i >= 0; i--)
igb_free_rx_resources(adapter->rx_ring[i]);
break;
}
}
return err;
}
// /Users/kangxiaoning/workspace/linux-3.10/net/core/dev.c
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
unsigned long pflags = current->flags;
/*
* PFMEMALLOC skbs are special, they should
* - be delivered to SOCK_MEMALLOC sockets only
* - stay away from userspace
* - have bounded memory usage
*
* Use PF_MEMALLOC as this saves us from propagating the allocation
* context down to all allocation sites.
*/
current->flags |= PF_MEMALLOC;
ret = __netif_receive_skb_core(skb, true);
tsk_restore_flags(current, pflags, PF_MEMALLOC);
} else
ret = __netif_receive_skb_core(skb, false);
return ret;
}