Linux Basics : NETLINK - ebiken/doc-network GitHub Wiki
Based on linux kernel 4.0-rc2
Index
- References from external sites
- netlink socket
- netlink message format
- netlink message header
- IP Service Template
- IP Service Specific data in TLVs (attributes)
References from external sites
- Netlink Library (libnl) explanation
- Core Library Developer's Guide
- Routing Library Developer's Guide
netlink socket
netlink_socket can be created using syntax below:
netlink_socket = socket(AF_NETLINK, socket_type, netlink_family);
netlink_family, sometimes called as protocol, are defined in (linux/netlink.h) as NETLINK_xxx. Below are some examples related to networking.
(linux/netlink.h)
#define NETLINK_ROUTE 0 /* Routing/device hook */
#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
#define NETLINK_XFRM 6 /* ipsec */
#define NETLINK_FIB_LOOKUP 10
#define NETLINK_NETFILTER 12 /* netfilter subsystem */
#define NETLINK_IP6_FW 13
#define NETLINK_GENERIC 16
#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
netlink message format
netlink message consists from 3 blocks.
- netlink message header : General netlink header.
- IP Service Template : Format defined per nlmsg_type.
- IP Service Specific data in TLVs : Attributes dependent on IP Services?
netlink message header
(linux/netlink.h)
struct nlmsghdr {
__u32 nlmsg_len; /* Length of message including header */
__u16 nlmsg_type; /* Message content */
__u16 nlmsg_flags; /* Additional flags */
__u32 nlmsg_seq; /* Sequence number */
__u32 nlmsg_pid; /* Sending process port ID */
};
nlmsg_type
There are 5 standard message types defined.
(linux/netlink.h)
#define NLMSG_NOOP 0x1 /* Nothing. */
#define NLMSG_ERROR 0x2 /* Error */
#define NLMSG_DONE 0x3 /* End of a dump */
#define NLMSG_OVERRUN 0x4 /* Data lost */
#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
Below are the major ones described in the RFC 3549.
- NLMSG_NOOP
- NLMSG_ERROR Signals an error containing nlmsgerr structure. This can be used as ACK by setting error code to 0 and flag NLM_F_ACK.
- NLMSG_DONE Describes the end of multi-part message.
Most of message types are specified by individual IP services. Below are some examples.
nlmsg_type example: specified by NETLINK_ROUTE
(linux/rtnetlink.h)
enum {
RTM_BASE = 16,
RTM_NEWLINK = 16,
RTM_DELLINK,
RTM_GETLINK,
RTM_SETLINK,
RTM_NEWADDR = 20,
RTM_DELADDR,
RTM_GETADDR,
RTM_NEWROUTE = 24,
RTM_DELROUTE,
RTM_GETROUTE,
RTM_NEWNEIGH = 28,
RTM_DELNEIGH,
RTM_GETNEIGH,
RTM_NEWRULE = 32,
RTM_DELRULE,
RTM_GETRULE,
RTM_NEWQDISC = 36,
RTM_DELQDISC,
RTM_GETQDISC,
RTM_NEWTCLASS = 40,
RTM_DELTCLASS,
RTM_GETTCLASS,
RTM_NEWTFILTER = 44,
RTM_DELTFILTER,
RTM_GETTFILTER,
RTM_NEWACTION = 48,
RTM_DELACTION,
RTM_GETACTION,
RTM_NEWPREFIX = 52,
RTM_GETMULTICAST = 58,
RTM_GETANYCAST = 62,
RTM_NEWNEIGHTBL = 64,
RTM_GETNEIGHTBL = 66,
RTM_SETNEIGHTBL,
RTM_NEWNDUSEROPT = 68,
RTM_NEWADDRLABEL = 72,
RTM_DELADDRLABEL,
RTM_GETADDRLABEL,
RTM_GETDCB = 78,
RTM_SETDCB,
RTM_GETMDB = 86,
__RTM_MAX,
};
In the actual code, each follows with
#define RTM_XXX RTM_XXX
to auto assign number (enum) instead of manually typing number#define RTM_XXX 1
for all symbols.
IP Service Template
IP Service Template is defined by each nlmsg_type.
Example of ifinfomsg which is used for nlmsg_type = RTM_NEWLINK, RTM_DELLINK, RTM_GETLINK.
(include/linux/rtnetlink.h)
struct ifinfomsg {
unsigned char ifi_family;
unsigned char __ifi_pad;
unsigned short ifi_type; /* ARPHRD_* */
int ifi_index; /* Link index */
unsigned ifi_flags; /* IFF_* flags */
unsigned ifi_change; /* IFF_* change mask */
};
IP Service Specific data in TLVs (attributes)
At the end of the netlink message, there will be attributes specific to each IP services in TLV format.
Generic format of netlink attribute is defined as below.
(include/linux/netlink.h)
struct nlattr {
__u16 nla_len;
__u16 nla_type;
};
Sometimes there could be Padding inserted to the attributes. Usually it'll be taken care by macro, but check alignments and padding if you face issue when parsing raw memory dump.
(include/linux/netlink.h)
/*
* <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
* +---------------------+- - -+- - - - - - - - - -+- - -+
* | Header | Pad | Payload | Pad |
* | (struct nlattr) | ing | | ing |
* +---------------------+- - -+- - - - - - - - - -+- - -+
* <-------------- nlattr->nla_len -------------->
*/
#define NLA_ALIGNTO 4
#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
Sometimes attribute structure is defined specific to netlink_family (or maybe IP Service. not confirmed).
For example, When looking into iproute2 source code, you would find attribute format for rtnetlink which is defined as below. This is logically identical to nlattr
above.
(include/linux/rtnetlink.h)
struct rtattr {
unsigned short rta_len; /* Length of option */
unsigned short rta_type; /* Type of option */
/* Data follows */
};
Nested attributes
Attributes could be in nested format called as "container" attribute. In such case, the fist (outermost) attribute header will hold length of entire set of attributes.
As an example, below is nested attributes used when adding bridge (br0) from iproute2 ip command.
(gdb) ptype req
type = struct iplink_req {
struct nlmsghdr n; // netlink message header
struct ifinfomsg i; // IP Service Template
char buf[1024]; // Attributes
}
(gdb) p req
$1 = {n = {nlmsg_len = 56, nlmsg_type = 16, nlmsg_flags = 1537, nlmsg_seq = 0, nlmsg_pid = 0}, i = {
ifi_family = 0 '\000', __ifi_pad = 0 '\000', ifi_type = 0, ifi_index = 0, ifi_flags = 0,
ifi_change = 0},
buf = "\b\000\003\000br0\000\020\000\022\000\n\000\001\000bridge", '\000' <repeats 1001 times>}
(gdb) p/x req.buf
$5 = {0x8, 0x0, 0x3, 0x0, 0x62, 0x72, 0x30, 0x0, 0x10, 0x0, 0x12, 0x0, 0xa, 0x0, 0x1, 0x0, 0x62, 0x72,
0x69, 0x64, 0x67, 0x65, 0x0 <repeats 1002 times>}
/* Parsing buf (attribute TLV data) */
0x8, 0x0, rta_len = 0x0008
0x3, 0x0, rta_type = IFLA_IFNAME (0x0003)
0x62, 0x72, 0x30, 0x0, b,r,0,NULL
# Nested rta:
0x10, 0x0, rta_len = 0x0010 (16)
0x12, 0x0, rta_type = IFLA_LINKINFO (0x0012)
0xa, 0x0, rta_len = 0x000a (10)
0x1, 0x0, rta_type = IFLA_INFO_KIND (0x0001)
0x62, 0x72, 0x69, 0x64, 0x67, 0x65, = b,r,i,d,g,e
0x00 0x00 // ?? padding ??
Definition (enum) of IFLA_IFNAME, IFLA_LINKINFO, IFLA_INFO_KIND can be found in (include/linux/if_link.h).