/* PIPAPO - PIle PAcket POlicies * * pipapo.c - Stand-alone algorithm implementation * * Author: Stefano Brivio * License: GPLv2 * * * == 1. Problem * * Match packets against n generic entries with m (ipset-like) fields in a set, * mapping them to n keys, for example: * * --- fields ---> * | [net],[port],[net]... => [key] * entries [net],[port],[net]... => [key] * | [net],[port],[net]... => [key] * V ... * * where [net] fields can be IP ranges or netmasks, and [port] fields are port * ranges. Arbitrary packet fields (source and destination IPv4, IPv6, MAC * addresses and ports) can be matched. * * * == 2. Introduction * * This algorithm is loosely inspired by [Ligatti 2010], and fundamentally * relies on the consideration that every contiguous range in a space of b bits * can be converted into b netmasks, from Theorem 3 in [Rottenstreich 2010]. * Despite the fact that we don't implement a boolean classifier here, some * relevant optimisation ideas were inspired by [Kogan 2014], especially related * to the notion of order-independent regions of a classifier. * * Classification against a number of entries, that require matching given bits * of a packet field, is performed by grouping those bits in sets of arbitrary * size, and classifying packet bits one group at a time. * * Example: to match the source port (16 bits) of a packet, we can divide * those 16 bits in 4 groups of 4 bits each. Given the entry: * 0000 0001 0101 1001 * and a packet with source port: * 0000 0001 1010 1001 * first and second groups match, but the third doesn't. We conclude that the * packet doesn't match the given entry. * * Translate the set to a sequence of lookup tables, one per field. Each table * has two dimensions: bit groups to be matched for a single packet field, and * all the possible values of said groups. Each group in the table maps to the * next group through a set of rules, which are generated from entries during * the pre-computation phase. The last group in a table maps to the next table, * and the set of matched rules after the last table in the set is mapped to a * single verdict key. * * To match, we perform table lookups using the values of relevant packet bits, * divided according to the groups, and use a sequence of bitwise operations to * progressively evaluate rule matching. * * In the description below, the basic idea is described first, and * optimisations are illustrated later. Steps with O(n) computational complexity * are then all reduced to O(log(n)) problems. * * * == 3. Precomputation * * - For each packet field: * * (3.1) divide the b packet bits we want to classify into groups of size t, * obtaining ceil(b / t) groups * * Example: match on destination IP address, with t = 4: 32 bits, 8 groups * of 4 bits each * * (3.2) allocate a lookup table with one column ("bucket") for each possible * value of a group, and with one row for each group * * Example: 8 groups, 2^4 buckets: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 * 1 * 2 * 3 * 4 * 5 * 6 * 7 * * (3.3) map the bits we want to classify for the current field, for a given * entry, to a single rule for non-ranged and netmask set items, and to one or * multiple rules for ranges. Ranges are expanded to composing netmasks: * * Example: 2 entries, 10.0.0.5:1024 and 192.168.1.0-192.168.2.1:2048 * - rule #0: 10.0.0.5 * - rule #1: 192.168.1.0/24 * - rule #2: 192.168.2.0/31 * * (3.4) insert references to the rules in the lookup table, selecting buckets * according to bit values of a rule in the given group * * Example: given: * - rule #0: 10.0.0.5 mapping to buckets * < 0 10 0 0 0 0 0 5 > * - rule #1: 192.168.1.0/24 mapping to buckets * < 12 0 10 8 0 1 < 0..15 > < 0..15 > > * - rule #2: 192.168.2.0/31 mapping to buckets * < 12 0 10 8 0 2 0 < 0..1 > > * * these bits are set in the lookup table: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0 1,2 * 1 1,2 0 * 2 0 1,2 * 3 0 1,2 * 4 0,1,2 * 5 0 1 2 * 6 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 7 1,2 1,2 1 1 1 0,1 1 1 1 1 1 1 1 1 1 1 * * (3.5) if this is not the last field in the set, build a mapping table that * maps rules from the lookup table to rules belonging to the same entry in * the next lookup table * * Example: 2 entries, 10.0.0.5:1024 and 192.168.1.0-192.168.2.1:2048 * * given lookup table #0 (see example above): * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0 1,2 * 1 1,2 0 * 2 0 1,2 * 3 0 1,2 * 4 0,1,2 * 5 0 1 2 * 6 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 7 1,2 1,2 1 1 1 0,1 1 1 1 1 1 1 1 1 1 1 * * and lookup table #1 with: * - rule #0: 1024 mapping to buckets * < 0 0 4 0 > * - rule #1: 2048 mapping to buckets * < 0 0 5 0 > * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0,1 * 1 0,1 * 2 0 1 * 3 0,1 * * we need to map rules for 10.0.0.5 in lookup table #0 (rule #0) to 1024 * in lookup table #1 (rule #0) and rules for 192.168.1.0-192.168.2.1 * (rules #1, #2) to 2048 in lookup table #2 (rule #1): * * rules * 0 1 2 * map to: 0 1 1 * * (3.6) if this is the last field in the set, build a mapping table that maps * rules from the last lookup table to verdict keys * * Example: 10.0.0.5:1024 gives 66 as verdict and * 192.168.1.0-192.168.2.1:2048 gives 42. From the rules of lookup table #2 * as mapped above: * * rules * 0 1 * map to: 66 42 * * * == 4. Matching * * (4.1) We use a result bitmap, with the size of a single lookup table bucket, * to represent the matching state that applies at every algorithm step. * * - For each packet field: * * (4.2) start with an all-ones result bitmap * * (4.3) perform a lookup into the table corresponding to the current field, * for each group, and at every group, AND the current result bitmap with the * value from the lookup table bucket * * Example: 192.168.1.5 < 12 0 10 8 0 1 0 5 >, with lookup table from * pre-computation example. * Lookup table buckets are at least 3 bits wide, we'll arbitrarily assume * they are 8 bits for convenience. Initial result bitmap is 0xff, the * steps below show the value of the result bitmap after each group is * processed: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0 1,2 * * result bitmap is now: 0xff & 0x6 [bucket 12] = 0x6 * * 1 1,2 0 * * result bitmap is now: 0x6 & 0x6 [bucket 0] = 0x6 * * 2 0 1,2 * * result bitmap is now: 0x6 & 0x6 [bucket 10] = 0x6 * * 3 0 1,2 * * result bitmap is now: 0x6 & 0x6 [bucket 8] = 0x6 * * 4 0,1,2 * * result bitmap is now: 0x6 & 0x7 [bucket 0] = 0x6 * * 5 0 1 2 * * result bitmap is now: 0x6 & 0x2 [bucket 1] = 0x2 * * 6 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * * result bitmap is now: 0x2 & 0x7 [bucket 0] = 0x2 * * 7 1,2 1,2 1 1 1 0,1 1 1 1 1 1 1 1 1 1 1 * * final result bitmap for this field is: 0x2 & 0x3 [bucket 5] = 0x2 * * (4.4) at the next field, start with a new, all-zeroes result bitmap. For * each bit set in the previous result bitmap, OR the new result bitmap with * the corresponding value from the mapping table for this field * * Example: with mapping table from pre-computation example, current result * bitmap 0x02: * * rules * 0 1 2 * map to: 0 1 1 * * new result bitmap is 0x00 | 0x02 [bit/rule 1 was set] = 0x02 * * we can now extend this example to cover the second iteration of the step * above (lookup and AND bitmap): assuming the port field is * 2048 < 0 0 5 0 >, with starting result bitmap 0x2, and lookup table * for "port" field from pre-computation example: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0,1 * 1 0,1 * 2 0 1 * 3 0,1 * * operations are: 0x2 & 0x3 [bucket 0] & 0x3 [bucket 0] & 0x2 [bucket 5] * & 0x3 [bucket 0], resulting bitmap is 0x2. * * (4.5) if this is the last field in the set, look up the value from the * verdict table corresponding to the final result bitmap * * Example: 0x2 resulting bitmap from 192.168.1.5:2048, verdict table from * pre-computation example: * * rules * 0 1 * map to: 66 42 * * the verdict is 42. * * * == 5. Ideas for Future Optimisations * * (5.1) Reduction of number of rules by allowing inverted netmasks * * CAVEAT: This needs some complicated tracking for inverted entries, which * aren't implemented here at all. In order to further halve the number of * rules as described below, we also need to use sets of netmasks, instead of a * single netmask for each entry, and intersect between them and their * inversions. This also introduces further complexity. * * Theorem 3 in [Rottenstreich 2010] shows that every range of b bits can be * converted into b bitmaps, with inversions, as also illustrated by section 9 * in [Kogan 2014]. * * Further, if we allow partial subtractions between netmasks, for each range R * of b bits, that can be converted into a b0 number of bitmaps, there exists a * complementary range, R1, that can be converted into a b1 number of bitmaps, * such that b1 + b0 = b. * * [TODO: Formal proof. This consideration is intuitively true and was validated * by bruteforcing over the IPv4 address space, so we can safely use it in the * implementation for the moment.] * * By construction, this allows us to express any range of b bits as ceil(b / 2) * entries, either as reversed or direct entries. By allowing reversed entries, * we need to also provide a reverse mapping possibility, which is achieved by * modifying the mapping table from pre-computation step 4.5, and the step * itself, as follows: * - every bit corresponding to a reverse rule is set in the initial result * bitmap, which is not necessarily an all-zeroes bitmap anymore * - the transformation step becomes a XOR operation: the XOR step with the * modified initial bitmap reverses the result, if we guarantee that one * single XOR step is performed. This comes, in turn, from optimisation 5.2 * * Example: match on 0.0.0.1-127.255.255.255:1024 and on 192.168.1.1:1024. * A direct mapping means we need 31 rules for the first address range, using * reversed rules we only need 2 rules instead (rule #0: 0.0.0.0/32, * rule #1: 128.0.0.0/1). Rule #2: 192.168.1.1/32. * First lookup table: * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0 1 1 1 1 1,2 1 1 1 * 1 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 2 0,1 1 1 1 1 1 1 1 1 1 1,2 1 1 1 1 1 * 3 0,1 1 1 1 1 1 1 1 1,2 1 1 1 1 1 1 1 * 4 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 5 0,1 1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 6 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 7 0,1 1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * * Second lookup table: rule #0: 1024 for first entry, rule #1: 1024 for * second entry * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0,1 * 1 0,1 * 2 0,1 * 3 0,1 * * mapping table: initial bitmap is 0x1 (entry with rule #0 in second table * is reversed in first table: set bit #0) * rules * 0 1 2 * XOR with: 0x1 0x1 0x2 * * - expansion to reverse rules is selected if the number of reversed rules is * strictly less than the number of needed direct rules. * * (5.2) Single step for match and fill from result bitmap * * CAVEAT: This significantly affects complexity of listing and deletion, * because once rules are replaced, further steps are needed to rebuild the * originating entries. * * Step 4.4 loops over all bits set in the current result bitmap: this is * needed as set entries might overlap, for one or more fields. The * computational complexity of mapping steps is O(m * log(n)), with m > 1 * overlapping factor, because we have no tight bounds on the number of * overlapping entries. However, we can apply a reduction step on lookup tables, * and ensure a single bit is set in the result bitmap after each lookup table * is considered, with three observations: * * (5.2.1) In case two entries overlap entirely in a given lookup table, and * their rules are present in the previous mapping table (or if it's the first * lookup table), the entries can map to the same set of rules without loss of * generality. * * Example: given entries 192.168.1.0/24:1024 and 192.168.1.0/24:2048, first * lookup table initially has rules: * #0: 192.168.1.0/24 * #1: 192.168.1.0/24 * and second lookup table has rules: * #0: 1024 * #1: 2048 * with mapping table #0 -> #0 | #1 -> #1. * After this reduction step, the first lookup table has a single * rule (#0: 192.168.1.0/24), mapping to both rule #0 and #1. * * Example: given entries 192.168.1.0/24:1024:10.0.0.1 and * 192.168.2.0/24:1024:10.0.0.1, we don't apply any reduction step. * First lookup table has rules: * #0: 192.168.1.0/24 * #1: 192.168.2.0/24 * and second lookup table has rules: * #0: 1024 * #1: 1024 * with first mapping table #0 -> #0 | #1 -> #1. As there are no * images in the first lookup table with both rules #0 and #1 set, * these two rules will be preserved. Third lookup table has two * rules: * #0: 10.0.0.1/32 * #1: 10.0.0.1/32 * and second mapping table is: #0 -> #0 | #1 -> #1. * * (5.2.2) In case two entries overlap partially in a given lookup table, due to * the way we expand ranges to netmasks, one set of rules is always a proper * subset of the other overlapping one. That is, we can hit this situation: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 x,y * 1 x,y x,y y y * * but not: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 x,y * 1 x x,y y y * * so in case entries corresponding to x map to a proper subset of the y rules, * x rules never occur without y rules. We can then replace the x,y pairs by * x, and merge their mapping tables. * * Example: entries: 192.168.1.1-192.168.1.2:1 * expands to: 192.168.1.1/32:1 (#0:#0), 192.168.1.2/32:1 (#1:#0) * 192.168.1.0-192.168.1.1:2 * expands to: 192.168.1.0/31:2 (#2:#1) * 192.168.1.0-192.168.1.2:3 * expands to: 192.168.1.0/31:3 (#3:#2), 192.168.1.2/32:3 (#4:#2) * * First lookup table (only last group shown): * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * ... * 7 2,3 0,2,3 1,4 * * Second lookup table (only last group shown): * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * ... * 3 0 1 2 * * Initial mapping table: #0 -> #0 | #1 -> #0 | #2 -> #1 | #3 -> #2 | #4 -> #2 * * Apply reduction step from 5.2.1 twice to first lookup table: rule #1 * entirely overlaps with #4, #2 entirely overlaps with #3. New lookup table: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * ... * 7 2 0,2 1 * * New mapping table: #0 -> #0 | #1 -> #0, #2 | #2 -> #1, #2 * * Apply reduction step from 5.2.2: 0 never occurs without 2: * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * ... * 7 2 0 1 * * New mapping table: #0 -> #0, #1, #2 | #1 -> #0, #2 | #2 -> #1, #2 * * (5.2.3) There are no other ways entries can overlap (directly from set * theory) * [TODO: Formal proof or reference.] * * (5.3) Single step for invert, match and bitmap fill operations (comes almost * for free with 5.2 and 5.1). * * We can now replace the loop from 4.4 with a single XOR operation * depending on the single bit set in the result bitmask after a lookup table is * evaluated. Computational complexity of that step becomes, in theory, O(1): * a single bit is set, hence lookup of the corresponding bitmap can be done in * constant time. For all practical implementations, this is O(log(n)): the * number of words in the mapping table grows linearly with the number of * entries, but number of rules grows sublinearly and we can find the set bit * with very efficient operations, too (e.g. x86's TZCNT, AVX-512 VPLZCNTQ, * PPC's CNTLZ, etc.)] * * (5.4) [TODO: Evaluate if this provides an actual advantage even with an * implementation of step 4.3 optionally based on AVX2 VPAND instrinsics.] * As an alternative to finding the first bit set in the result bitmap for the * mapping table lookup, we can terminate step 4.3 early, once we reach the last * group of a table and a set bit is found in the result, and use the position * of this bit directly for the mapping table lookup. * * (5.5) [TODO: Evaluate how bad this is on architectures where branching is * expensive, e.g. PPC, find out if it makes sense to implement this optionally * depending on architecture.] * - We can terminate the algorithm early if the initial result bitmap, as we * start processing the next lookup table, is empty: that means that the * packet won't match any entry * - We can terminate step 4.3 early if the result bitmap happens to be empty * after a group is entirely evaluated: this means the result bitmap will be * also empty at the beginning of step 4.4. We can't terminate the whole * algorithm in this case, though, as entries might be represented with an * inverted set of rules. * * (5.6) The mapping table described in step (3.5) grows in size linearly with * the product of rules for the current field and rules for the next field. For * entries with non-overlapping rules in a given field, this size would grow * with the number of rules for this field, independently of the fact that these * rules might map to the same set of rules in the next field. * * Example: one entry, 192.168.1.0-192.168.2.1:2048 * * given lookup table #0 (see example above): * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0 1,2 * 1 1,2 0 * 2 0 1,2 * 3 0 1,2 * 4 0,1,2 * 5 0 1 2 * 6 0,1,2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 * 7 1,2 1,2 1 1 1 0,1 1 1 1 1 1 1 1 1 1 1 * * and lookup table #1 with: * - rule #0: 1024 mapping to buckets * < 0 0 4 0 > * - rule #1: 2048 mapping to buckets * < 0 0 5 0 > * * bucket * group 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 * 0 0,1 * 1 0,1 * 2 0 1 * 3 0,1 * * we need to map rules for 10.0.0.5 in lookup table #0 (rule #0) to 1024 * in lookup table #1 (rule #0) and rules for 192.168.1.0-192.168.2.1 * (rules #1, #2) to 2048 in lookup table #2 (rule #1): * * rules * 0 1 2 * map to: 0 1 1 * * * == 6. References * * [Ligatti 2010] * A Packet-classification Algorithm for Arbitrary Bitmask Rules, with * Automatic Time-space Tradeoffs * Jay Ligatti, Josh Kuhn, and Chris Gage. * Proceedings of the IEEE International Conference on Computer * Communication Networks (ICCCN), August 2010. * http://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf * * [Rottenstreich 2010] * Worst-Case TCAM Rule Expansion * Ori Rottenstreich and Isaac Keslassy. * 2010 Proceedings IEEE INFOCOM, San Diego, CA, 2010. * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.212.4592&rep=rep1&type=pdf * * [Kogan 2014] * SAX-PAC (Scalable And eXpressive PAcket Classification) * Kirill Kogan, Sergey Nikolenko, Ori Rottenstreich, William Culhane, * and Patrick Eugster. * Proceedings of the 2014 ACM conference on SIGCOMM, August 2014. * http://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf */ #ifndef VERBOSE #define PERF_TEST_BLOCK 100 #endif #include #include #include #include #include #include #include #include "pipapo.h" #include "set.h" #include "match.h" #include "util.h" /* Specifiers for set description: label, type, length, and offset in packet */ struct desc_spec desc_specs[] = { { "key", KEY, 0, 0 }, { "dmac", MAC, MAC_LEN, 0 }, { "smac", MAC, MAC_LEN, 6 }, { "saddr", ADDR, ADDR_LEN, 26 }, { "daddr", ADDR, ADDR_LEN, 30 }, { "sport", PORT, PORT_LEN, 34 }, { "dport", PORT, PORT_LEN, 36 }, { "saddr6", ADDR6, ADDR6_LEN, 22 }, { "daddr6", ADDR6, ADDR6_LEN, 38 }, { "sport6", PORT, PORT_LEN, 54 }, { "dport6", PORT, PORT_LEN, 56 }, { NULL, 0, 0, 0 }, }; /** * desc_parse_spec() - Parse set layout from tokens on first line of set file * @d: Pre-allocated set description to fill in * @n: Number of field tokens * @tokens: Labels of fields in desired set layout * * Return: 0 on success, negative error code on failure */ int desc_parse_spec(struct desc *d, int n, char (*tokens)[BUFSIZ]) { struct desc_spec *spec; int i; d->row_size = sizeof(enum set_ops); for (i = 0; i < n; i++) { for (spec = desc_specs; spec->label; spec++) { if (!strncmp(tokens[i], spec->label, BUFSIZ)) break; } if (!spec->label) { fprintf(stderr, "Invalid set specifier token %s\n", tokens[i]); return -EINVAL; } d->layout[i] = spec; if (spec->type == KEY) { d->row_size += sizeof(uint32_t); continue; } d->fields++; if (spec->type == ADDR) d->row_size += sizeof(struct addr); else if (spec->type == ADDR6) d->row_size += sizeof(struct addr6); else if (spec->type == PORT) d->row_size += sizeof(struct port); else if (spec->type == MAC) d->row_size += sizeof(struct mac); } return 0; } /** * desc_parse_entry() - Parse set entries from set file * @d: Pre-allocated set description, d->data to be filled * @at: Starting position in d->data to be filled with next set entry * @n: Number of fields * @tokens: Set items: addresses, ports, ranges thereof * * Return: 0 on success, negative error code on failure */ int desc_parse_entry(struct desc *d, int at, int n, char (*tokens)[BUFSIZ]) { int i, base = at * d->row_size, offset, b; struct addr6 a6; struct addr a; struct port p; char *c, *end; struct mac m; uint32_t key; uint8_t v[6]; if (!strncmp(tokens[0], "list", strlen(tokens[0]))) { *(enum set_ops *)(d->data + base) = LIST; return 0; } if (!strncmp(tokens[0], "add", strlen(tokens[0]))) *(enum set_ops *)(d->data + base) = ADD; else if (!strncmp(tokens[0], "delete", strlen(tokens[0]))) *(enum set_ops *)(d->data + base) = DEL; else return -EINVAL; offset = sizeof(enum set_ops); tokens++; n--; for (i = 0; i < n; i++) { switch (d->layout[i]->type) { case KEY: key = strtoul(tokens[i], &end, 10); if (*end) return -EINVAL; memcpy(d->data + base + offset, &key, sizeof(key)); offset += sizeof(key); break; case ADDR: /* IPv4... */ if ((c = strchr(tokens[i], '-'))) { /* range */ if (!inet_aton(c + 1, (struct in_addr *)&a.end)) return -EINVAL; a.cidr = 0; *c = 0; } else if ((c = strchr(tokens[i], '/'))) { /* netmask */ *c = 0; a.cidr = strtoul(c + 1, &end, 10); if (*end) return -EINVAL; } else { /* address */ a.cidr = 32; } if (!inet_aton(tokens[i], (struct in_addr *)&a.start)) return -EINVAL; memcpy(d->data + base + offset, &a, sizeof(a)); offset += sizeof(a); break; case ADDR6: /* IPv6... */ if ((c = strchr(tokens[i], '-'))) { /* range */ if (!inet_pton(AF_INET6, c + 1, &a6.end)) return -EINVAL; a6.cidr = 0; *c = 0; } else if ((c = strchr(tokens[i], '/'))) { /* netmask */ *c = 0; a6.cidr = strtoul(c + 1, &end, 10); if (*end) return -EINVAL; } else { /* address */ a6.cidr = 128; } if (!inet_pton(AF_INET6, tokens[i], &a6.start)) return -EINVAL; memcpy(d->data + base + offset, &a6, sizeof(a6)); offset += sizeof(a6); break; case PORT: if ((c = strchr(tokens[i], '-'))) { *c = 0; p.end = htons(strtoul(c + 1, &end, 10)); if (*end) return -EINVAL; } else { p.end = 0; } p.start = htons(strtoul(tokens[i], &end, 10)); if (*end) return -EINVAL; memcpy(d->data + base + offset, &p, sizeof(p)); offset += sizeof(p); break; case MAC: if ((c = strchr(tokens[i], '-'))) *c = 0; b = sscanf(tokens[i], "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx%*c", v + 0, v + 1, v + 2, v + 3, v + 4, v + 5); if (b != 6) return -EINVAL; memcpy(&m.start, v, 6); if (c) { b = sscanf(c + 1, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx%*c", v + 0, v + 1, v + 2, v + 3, v + 4, v + 5); if (b != 6) return -EINVAL; memcpy(&m.end, v, 6); } else { memset(&m.end, 0, 6); } memcpy(d->data + base + offset, &m, sizeof(m)); offset += sizeof(m); break; } } return 0; } /** * usage() - Display usage and exit * @argv0: Executable name */ void usage(const char *argv0) { fprintf(stderr, "%s \n", argv0); exit(1); } /** * packet_parse() - Parse packets and expected key from path into binary format * @path: Path to packet file * @ret: Pointer to packet buffer, incremented by this function * * Return: 0 on success, negative error code on failure */ static int packet_parse(const char *path, uint8_t **ret) { int alloc, n, byte_count, packet_count = 0, err; uint8_t *packets, *pos, *tmp; char buf[BUFSIZ], *end; char *p; FILE *f; alloc = BUFSIZ; pos = packets = malloc(alloc * (sizeof(uint32_t) + PACKET_SIZE)); if (!packets) return -ENOMEM; f = fopen(path, "r"); if (!f) { err = -errno; goto fail; } while (fgets(buf, BUFSIZ, f)) { if (!*buf || *buf == '#' || *buf == ' ' || *buf == '\n') continue; if (++packet_count > alloc) { alloc += BUFSIZ; tmp = realloc(packets, alloc * (sizeof(uint32_t) + PACKET_SIZE)); if (!tmp) { err = -ENOMEM; goto fail_close; } packets = tmp; pos = packets; pos += (packet_count - 1) * (sizeof(uint32_t) + PACKET_SIZE); } /* Key */ p = strtok(buf, " \t"); n = strtoul(p, &end, 0); if (*end) { err = -EINVAL; goto fail_close; } *(uint32_t *)pos = n; pos += sizeof(uint32_t); /* Packet bytes */ byte_count = 0; while ((p = strtok(NULL, " \t\n"))) { n = strtoul(p, &end, 0); if (*end) { err = -EINVAL; goto fail_close; } *pos = n; pos++; if (++byte_count >= PACKET_SIZE) break; } if (byte_count < PACKET_SIZE) { memset(pos, 0, PACKET_SIZE - byte_count); pos += PACKET_SIZE - byte_count; } } fclose(f); *ret = packets; verbose("Read %i packets from %s\n", packet_count, path); return packet_count; fail_close: fclose(f); fail: free(packets); return err; } /** * desc_parse() - Parse set file from path * @path: Path to set file * @d: Pre-allocated set, with description and data parts * * Return: 0 on success, negative error code on failure */ int desc_parse(const char *path, struct desc *d) { int first = 1, err = 0, alloc = 0, n; char t[9][BUFSIZ]; char buf[BUFSIZ]; uint8_t *tmp; FILE *f; f = fopen(path, "r"); if (!f) return -errno; while (fgets(buf, BUFSIZ, f)) { n = sscanf(buf, "%s %s %s %s %s %s %s %s %s", t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8]); if (!n || *t[0] == '#') continue; if (first) { /* First line is the set type specifier */ err = desc_parse_spec(d, n, t); if (err) goto out; first = 0; continue; } if (d->entries == alloc) { alloc += BUFSIZ; tmp = realloc(d->data, alloc * d->row_size); if (!tmp) { err = -ENOMEM; goto out; } d->data = tmp; } err = desc_parse_entry(d, d->entries++, n, t); } out: fclose(f); return err; } /** * main() - Entry point: parse set and packets, pre-compute, match * @argc: Argument count * @argv: Path to set description and packet files * * Return: 0 on success, 1 on matching failure, negative error code otherwise */ int main(int argc, char **argv) { int packet_count, offset = 0, i, err; uint8_t *packets, *ptr; struct desc d = { 0 }; struct kernel_set *ks; struct set s = { 0 }; uint32_t key; #ifdef VERBOSE char *total_mem_suffix = "B"; int total_mem = 0; struct field *f; #else struct timespec tp_start, tp_end; int repeat = 0, b = 0; double time = 0; #endif (void)key; if (argc != 3) usage(argv[0]); verbose("=== Parsing\n"); if ((packet_count = packet_parse(argv[2], &packets)) < 0) { fprintf(stderr, "Parsing failed, %s\n", strerror(packet_count)); exit(packet_count); } if ((err = desc_parse(argv[1], &d))) { fprintf(stderr, "Parsing failed, %s\n", strerror(err)); return err; } verbose("\n"); verbose("=== Pre-computation\n"); if ((err = init(&s, d.layout))) { fprintf(stderr, "Set init failed, %s\n", strerror(err)); return err; } for (i = 0; i < d.entries; i++) { if (*(enum set_ops *)(d.data + offset) == ADD) err = add(&s, d.layout, d.data + offset + sizeof(enum set_ops)); else if (*(enum set_ops *)(d.data + offset) == LIST) list_or_del(&s, d.layout, NULL); else if (*(enum set_ops *)(d.data + offset) == DEL) err = list_or_del(&s, d.layout, d.data + offset + sizeof(enum set_ops)); if (err) return err; offset += d.row_size; } for (i = 0; d.layout[i]->type != KEY; i++) { verbose("\n"); verbose("Lookup table for %s:\n", d.layout[i]->label); show_lookup(&s.fields[i]); verbose("Mapping table for %s:\n", d.layout[i]->label); show_mapping(&s.fields[i], d.layout[i + 1]->type == KEY); } verbose("\n"); verbose("=== Match\n"); ks = kernel_init(&s, d.layout); #ifndef VERBOSE repeat_block: repeat = PERF_TEST_BLOCK; b++; clock_gettime(CLOCK_MONOTONIC, &tp_start); do { #endif ptr = packets; for (i = 0; i < packet_count; i++) { key = match(ks, ptr + sizeof(uint32_t)); #ifdef VERBOSE verbose("Packet #%i => key: %u\n\n", i, key); if (key != *(uint32_t *)ptr) { verbose("FAIL: expected key: %u\n", *(uint32_t *)ptr); return 1; } #endif ptr += PACKET_SIZE + sizeof(uint32_t); } #ifndef VERBOSE } while (repeat--); clock_gettime(CLOCK_MONOTONIC, &tp_end); time += (tp_end.tv_sec - tp_start.tv_sec) + (tp_end.tv_nsec - tp_start.tv_nsec) / 1E9; if (time < 5) { goto repeat_block; } fprintf(stdout, "Matched %lfM packets in %lfs (%lf Mpps)\n", PERF_TEST_BLOCK * b * packet_count / 1000.0 / 1000, time, PERF_TEST_BLOCK * b * packet_count / 1000.0 / 1000 / time); #endif #ifdef VERBOSE verbose("\n"); verbose("\n=== Memory Usage\n"); for_each_field(f, &s, d.layout) { char *lt_suffix = "B", *mt_suffix = "B", *field_suffix = "B"; int lt_size = f->groups * BUCKETS * f->bsize; int mt_size = f->rules * sizeof(*f->mt); int field_size; total_mem += field_size = lt_size + mt_size + sizeof(*f); if (lt_size > 20 * 1024) { lt_size /= 1024; lt_suffix = "KiB"; } if (mt_size > 20 * 1024) { mt_size /= 1024; mt_suffix = "KiB"; } if (field_size > 20 * 1024) { field_size /= 1024; field_suffix = "KiB"; } verbose("Field %i, %s, %i rules:\n", i, d.layout[i]->label, f->rules); verbose(" lookup table: %i%s\n", lt_size, lt_suffix); free(f->lt); verbose(" mapping table: %i%s\n", mt_size, mt_suffix); free(f->mt); verbose(" total: %i%s\n", field_size, field_suffix); verbose("\n"); } if (total_mem > 50 * 1024 * 1024) { total_mem /= 1024 * 1024; total_mem_suffix = "MiB"; } else if (total_mem > 20 * 1024) { total_mem /= 1024; total_mem_suffix = "KiB"; } verbose("Total: %i%s\n", total_mem, total_mem_suffix); #endif /* Rest of allocated data is volatile */ free(s.fields); free(d.data); free(packets); free(ks->map[0]); free(ks->map[1]); for (i = 0; i < 8; i++) { free(ks->lt[i]); free(ks->mt[i]); } free(ks); return 0; }