/* PIPAPO - PIle PAcket POlicies
 *
 * pipapo.c - Stand-alone algorithm implementation
 *
 * Author: Stefano Brivio <sbrivio@redhat.com>
 * License: GPLv2
 *
 *
 * == 1. Problem
 *
 * Match packets against n generic entries with m (ipset-like) fields in a set,
 * mapping them to n keys, for example:
 *
 *             --- fields --->
 *   |     [net],[port],[net]... => [key]
 * entries [net],[port],[net]... => [key]
 *   |     [net],[port],[net]... => [key]
 *   V     ...
 *
 * where [net] fields can be IP ranges or netmasks, and [port] fields are port
 * ranges. Arbitrary packet fields (source and destination IPv4, IPv6, MAC
 * addresses and ports) can be matched.
 *
 *
 * == 2. Introduction
 *
 * This algorithm is loosely inspired by [Ligatti 2010], and fundamentally
 * relies on the consideration that every contiguous range in a space of b bits
 * can be converted into b netmasks, from Theorem 3 in [Rottenstreich 2010].
 * Despite the fact that we don't implement a boolean classifier here, some
 * relevant optimisation ideas were inspired by [Kogan 2014], especially related
 * to the notion of order-independent regions of a classifier.
 *
 * Classification against a number of entries, that require matching given bits
 * of a packet field, is performed by grouping those bits in sets of arbitrary
 * size, and classifying packet bits one group at a time.
 *
 *   Example: to match the source port (16 bits) of a packet, we can divide
 *   those 16 bits in 4 groups of 4 bits each. Given the entry:
 *      0000 0001 0101 1001
 *   and a packet with source port:
 *      0000 0001 1010 1001
 *   first and second groups match, but the third doesn't. We conclude that the
 *   packet doesn't match the given entry.
 *
 * Translate the set to a sequence of lookup tables, one per field. Each table
 * has two dimensions: bit groups to be matched for a single packet field, and
 * all the possible values of said groups. Each group in the table maps to the
 * next group through a set of rules, which are generated from entries during
 * the pre-computation phase. The last group in a table maps to the next table,
 * and the set of matched rules after the last table in the set is mapped to a
 * single verdict key.
 *
 * To match, we perform table lookups using the values of relevant packet bits,
 * divided according to the groups, and use a sequence of bitwise operations to
 * progressively evaluate rule matching.
 *
 * In the description below, the basic idea is described first, and
 * optimisations are illustrated later. Steps with O(n) computational complexity
 * are then all reduced to O(log(n)) problems.
 *
 *
 * == 3. Precomputation
 *
 * - For each packet field:
 *
 *   (3.1) divide the b packet bits we want to classify into groups of size t,
 *   obtaining ceil(b / t) groups
 *
 *      Example: match on destination IP address, with t = 4: 32 bits, 8 groups
 *      of 4 bits each
 *
 *   (3.2) allocate a lookup table with one column ("bucket") for each possible
 *   value of a group, and with one row for each group
 *
 *      Example: 8 groups, 2^4 buckets:
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0
 *        1
 *        2
 *        3
 *        4
 *        5
 *        6
 *        7
 *
 *   (3.3) map the bits we want to classify for the current field, for a given
 *   entry, to a single rule for non-ranged and netmask set items, and to one or
 *   multiple rules for ranges. Ranges are expanded to composing netmasks:
 *
 *      Example: 2 entries, 10.0.0.5:1024 and 192.168.1.0-192.168.2.1:2048
 *      - rule #0: 10.0.0.5
 *      - rule #1: 192.168.1.0/24
 *      - rule #2: 192.168.2.0/31
 *
 *   (3.4) insert references to the rules in the lookup table, selecting buckets
 *   according to bit values of a rule in the given group
 *
 *      Example: given:
 *      - rule #0: 10.0.0.5 mapping to buckets
 *        < 0 10  0 0   0 0  0 5 >
 *      - rule #1: 192.168.1.0/24 mapping to buckets
 *        < 12 0  10 8  0 1  < 0..15 > < 0..15 > >
 *      - rule #2: 192.168.2.0/31 mapping to buckets
 *        < 12 0  10 8  0 2  0 < 0..1 > >
 *
 *      these bits are set in the lookup table:
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0    0                                              1,2
 *        1   1,2                                      0
 *        2    0                                      1,2
 *        3    0                              1,2
 *        4  0,1,2
 *        5    0   1   2
 *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
 *
 *   (3.5) if this is not the last field in the set, build a mapping table that
 *   maps rules from the lookup table to rules belonging to the same entry in
 *   the next lookup table
 *
 *      Example: 2 entries, 10.0.0.5:1024 and 192.168.1.0-192.168.2.1:2048
 *
 *      given lookup table #0 (see example above):
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0    0                                              1,2
 *        1   1,2                                      0
 *        2    0                                      1,2
 *        3    0                              1,2
 *        4  0,1,2
 *        5    0   1   2
 *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
 *
 *      and lookup table #1 with:
 *      - rule #0: 1024 mapping to buckets
 *        < 0  0  4  0 >
 *      - rule #1: 2048 mapping to buckets
 *        < 0  0  5  0 >
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0   0,1
 *        1   0,1
 *        2                    0   1
 *        3   0,1
 *
 *      we need to map rules for 10.0.0.5 in lookup table #0 (rule #0) to 1024
 *      in lookup table #1 (rule #0) and rules for 192.168.1.0-192.168.2.1
 *      (rules #1, #2) to 2048 in lookup table #2 (rule #1):
 *
 *                 rules
 *               0   1   2
 *      map to:  0   1   1
 *
 *   (3.6) if this is the last field in the set, build a mapping table that maps
 *   rules from the last lookup table to verdict keys
 *
 *      Example: 10.0.0.5:1024 gives 66 as verdict and
 *      192.168.1.0-192.168.2.1:2048 gives 42. From the rules of lookup table #2
 *      as mapped above:
 *
 *                 rules
 *               0   1
 *      map to: 66  42
 *
 *
 * == 4. Matching
 *
 * (4.1) We use a result bitmap, with the size of a single lookup table bucket,
 * to represent the matching state that applies at every algorithm step.
 *
 * - For each packet field:
 *
 *   (4.2) start with an all-ones result bitmap
 *
 *   (4.3) perform a lookup into the table corresponding to the current field,
 *   for each group, and at every group, AND the current result bitmap with the
 *   value from the lookup table bucket
 *
 *      Example: 192.168.1.5 < 12 0  10 8  0 1  0 5 >, with lookup table from
 *      pre-computation example.
 *      Lookup table buckets are at least 3 bits wide, we'll arbitrarily assume
 *      they are 8 bits for convenience. Initial result bitmap is 0xff, the
 *      steps below show the value of the result bitmap after each group is
 *      processed:
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0    0                                              1,2
 *
 *        result bitmap is now: 0xff & 0x6 [bucket 12] = 0x6
 *
 *        1   1,2                                      0
 *
 *        result bitmap is now: 0x6 & 0x6 [bucket 0] = 0x6
 *
 *        2    0                                      1,2
 *
 *        result bitmap is now: 0x6 & 0x6 [bucket 10] = 0x6
 *
 *        3    0                              1,2
 *
 *        result bitmap is now: 0x6 & 0x6 [bucket 8] = 0x6
 *
 *        4  0,1,2
 *
 *        result bitmap is now: 0x6 & 0x7 [bucket 0] = 0x6
 *
 *        5    0   1   2
 *
 *        result bitmap is now: 0x6 & 0x2 [bucket 1] = 0x2
 *
 *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *
 *        result bitmap is now: 0x2 & 0x7 [bucket 0] = 0x2
 *
 *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
 *
 *        final result bitmap for this field is: 0x2 & 0x3 [bucket 5] = 0x2
 *
 *   (4.4) at the next field, start with a new, all-zeroes result bitmap. For
 *   each bit set in the previous result bitmap, OR the new result bitmap with
 *   the corresponding value from the mapping table for this field
 *
 *      Example: with mapping table from pre-computation example, current result
 *      bitmap 0x02:
 *
 *                 rules
 *               0   1   2
 *      map to:  0   1   1
 *
 *      new result bitmap is 0x00 | 0x02 [bit/rule 1 was set] = 0x02
 *
 *      we can now extend this example to cover the second iteration of the step
 *      above (lookup and AND bitmap): assuming the port field is
 *      2048 < 0  0  5  0 >, with starting result bitmap 0x2, and lookup table
 *      for "port" field from pre-computation example:
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0   0,1
 *        1   0,1
 *        2                    0   1
 *        3   0,1
 *
 *       operations are: 0x2 & 0x3 [bucket 0] & 0x3 [bucket 0] & 0x2 [bucket 5]
 *       & 0x3 [bucket 0], resulting bitmap is 0x2.
 *
 *   (4.5) if this is the last field in the set, look up the value from the
 *   verdict table corresponding to the final result bitmap
 *
 *      Example: 0x2 resulting bitmap from 192.168.1.5:2048, verdict table from
 *      pre-computation example:
 *
 *                 rules
 *               0   1
 *      map to: 66  42
 *
 *      the verdict is 42.
 *
 *
 * == 5. Ideas for Future Optimisations
 *
 * (5.1) Reduction of number of rules by allowing inverted netmasks
 *
 * CAVEAT: This needs some complicated tracking for inverted entries, which
 * aren't implemented here at all. In order to further halve the number of
 * rules as described below, we also need to use sets of netmasks, instead of a
 * single netmask for each entry, and intersect between them and their
 * inversions. This also introduces further complexity.
 * 
 * Theorem 3 in [Rottenstreich 2010] shows that every range of b bits can be
 * converted into b bitmaps, with inversions, as also illustrated by section 9
 * in [Kogan 2014].
 *
 * Further, if we allow partial subtractions between netmasks, for each range R
 * of b bits, that can be converted into a b0 number of bitmaps, there exists a
 * complementary range, R1, that can be converted into a b1 number of bitmaps,
 * such that b1 + b0 = b.
 *
 * [TODO: Formal proof. This consideration is intuitively true and was validated
 * by bruteforcing over the IPv4 address space, so we can safely use it in the
 * implementation for the moment.]
 *
 * By construction, this allows us to express any range of b bits as ceil(b / 2)
 * entries, either as reversed or direct entries. By allowing reversed entries,
 * we need to also provide a reverse mapping possibility, which is achieved by
 * modifying the mapping table from pre-computation step 4.5, and the step
 * itself, as follows:
 * - every bit corresponding to a reverse rule is set in the initial result
 *   bitmap, which is not necessarily an all-zeroes bitmap anymore
 * - the transformation step becomes a XOR operation: the XOR step with the
 *   modified initial bitmap reverses the result, if we guarantee that one
 *   single XOR step is performed. This comes, in turn, from optimisation 5.2
 *
 *   Example: match on 0.0.0.1-127.255.255.255:1024 and on 192.168.1.1:1024.
 *   A direct mapping means we need 31 rules for the first address range, using
 *   reversed rules we only need 2 rules instead (rule #0: 0.0.0.0/32,
 *   rule #1: 128.0.0.0/1). Rule #2: 192.168.1.1/32.
 *   First lookup table:
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0    0                               1   1   1   1  1,2  1   1   1
 *        1  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        2   0,1  1   1   1   1   1   1   1   1   1  1,2  1   1   1   1   1
 *        3   0,1  1   1   1   1   1   1   1  1,2  1   1   1   1   1   1   1
 *        4  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        5   0,1 1,2  1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        7   0,1 1,2  1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *
 *   Second lookup table: rule #0: 1024 for first entry, rule #1: 1024 for
 *                        second entry
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0   0,1
 *        1                   0,1
 *        2   0,1
 *        3   0,1
 *
 *    mapping table: initial bitmap is 0x1 (entry with rule #0 in second table
 *                   is reversed in first table: set bit #0)
 *                 rules
 *                  0   1   2
 *      XOR with: 0x1 0x1 0x2
 *
 * - expansion to reverse rules is selected if the number of reversed rules is
 *   strictly less than the number of needed direct rules.
 *
 * (5.2) Single step for match and fill from result bitmap
 *
 * CAVEAT: This significantly affects complexity of listing and deletion,
 * because once rules are replaced, further steps are needed to rebuild the
 * originating entries.
 * 
 * Step 4.4 loops over all bits set in the current result bitmap: this is
 * needed as set entries might overlap, for one or more fields. The
 * computational complexity of mapping steps is O(m * log(n)), with m > 1
 * overlapping factor, because we have no tight bounds on the number of
 * overlapping entries. However, we can apply a reduction step on lookup tables,
 * and ensure a single bit is set in the result bitmap after each lookup table
 * is considered, with three observations:
 *
 * (5.2.1) In case two entries overlap entirely in a given lookup table, and
 * their rules are present in the previous mapping table (or if it's the first
 * lookup table), the entries can map to the same set of rules without loss of
 * generality.
 *
 *   Example: given entries 192.168.1.0/24:1024 and 192.168.1.0/24:2048, first
 *            lookup table initially has rules:
 *              #0: 192.168.1.0/24
 *              #1: 192.168.1.0/24
 *            and second lookup table has rules:
 *              #0: 1024
 *              #1: 2048
 *            with mapping table #0 -> #0 | #1 -> #1.
 *            After this reduction step, the first lookup table has a single
 *            rule (#0: 192.168.1.0/24), mapping to both rule #0 and #1.
 *
 *   Example: given entries 192.168.1.0/24:1024:10.0.0.1 and
 *            192.168.2.0/24:1024:10.0.0.1, we don't apply any reduction step.
 *            First lookup table has rules:
 *              #0: 192.168.1.0/24
 *              #1: 192.168.2.0/24
 *            and second lookup table has rules:
 *              #0: 1024
 *              #1: 1024
 *            with first mapping table #0 -> #0 | #1 -> #1. As there are no
 *            images in the first lookup table with both rules #0 and #1 set,
 *            these two rules will be preserved. Third lookup table has two
 *            rules:
 *              #0: 10.0.0.1/32
 *              #1: 10.0.0.1/32
 *            and second mapping table is: #0 -> #0 | #1 -> #1.
 *
 * (5.2.2) In case two entries overlap partially in a given lookup table, due to
 * the way we expand ranges to netmasks, one set of rules is always a proper
 * subset of the other overlapping one. That is, we can hit this situation:
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0       x,y
 *        1   x,y x,y  y   y
 *
 * but not:
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0       x,y
 *        1    x  x,y  y   y
 *
 * so in case entries corresponding to x map to a proper subset of the y rules,
 * x rules never occur without y rules. We can then replace the x,y pairs by
 * x, and merge their mapping tables.
 *
 *   Example: entries: 192.168.1.1-192.168.1.2:1
 *              expands to: 192.168.1.1/32:1 (#0:#0), 192.168.1.2/32:1 (#1:#0)
 *                     192.168.1.0-192.168.1.1:2
 *              expands to: 192.168.1.0/31:2 (#2:#1)
 *                     192.168.1.0-192.168.1.2:3
 *              expands to: 192.168.1.0/31:3 (#3:#2), 192.168.1.2/32:3 (#4:#2)
 *
 *   First lookup table (only last group shown):
 *                     bucket
 *      group  0    1    2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *       ...
 *        7   2,3 0,2,3 1,4
 *
 *   Second lookup table (only last group shown):
 *                     bucket
 *      group  0    1    2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *       ...
 *        3         0    1   2
 *
 *   Initial mapping table: #0 -> #0 | #1 -> #0 | #2 -> #1 | #3 -> #2 | #4 -> #2
 *
 *   Apply reduction step from 5.2.1 twice to first lookup table: rule #1
 *   entirely overlaps with #4, #2 entirely overlaps with #3. New lookup table:
 *
 *                     bucket
 *      group  0    1    2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *       ...
 *        7    2   0,2   1
 *
 *   New mapping table: #0 -> #0 | #1 -> #0, #2 | #2 -> #1, #2
 *
 *   Apply reduction step from 5.2.2: 0 never occurs without 2:
 *
 *                     bucket
 *      group  0    1    2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *       ...
 *        7    2    0    1
 *
 *   New mapping table: #0 -> #0, #1, #2 | #1 -> #0, #2 | #2 -> #1, #2
 *
 * (5.2.3) There are no other ways entries can overlap (directly from set
 * theory)
 * [TODO: Formal proof or reference.]
 *
 * (5.3) Single step for invert, match and bitmap fill operations (comes almost
 * for free with 5.2 and 5.1).
 *
 * We can now replace the loop from 4.4 with a single XOR operation
 * depending on the single bit set in the result bitmask after a lookup table is
 * evaluated. Computational complexity of that step becomes, in theory, O(1):
 * a single bit is set, hence lookup of the corresponding bitmap can be done in
 * constant time. For all practical implementations, this is O(log(n)): the
 * number of words in the mapping table grows linearly with the number of
 * entries, but number of rules grows sublinearly and we can find the set bit
 * with very efficient operations, too (e.g. x86's TZCNT, AVX-512 VPLZCNTQ,
 * PPC's CNTLZ, etc.)]
 *
 * (5.4) [TODO: Evaluate if this provides an actual advantage even with an
 * implementation of step 4.3 optionally based on AVX2 VPAND instrinsics.]
 * As an alternative to finding the first bit set in the result bitmap for the
 * mapping table lookup, we can terminate step 4.3 early, once we reach the last
 * group of a table and a set bit is found in the result, and use the position
 * of this bit directly for the mapping table lookup.
 *
 * (5.5) [TODO: Evaluate how bad this is on architectures where branching is
 * expensive, e.g. PPC, find out if it makes sense to implement this optionally
 * depending on architecture.]
 * - We can terminate the algorithm early if the initial result bitmap, as we
 *   start processing the next lookup table, is empty: that means that the
 *   packet won't match any entry
 * - We can terminate step 4.3 early if the result bitmap happens to be empty
 *   after a group is entirely evaluated: this means the result bitmap will be
 *   also empty at the beginning of step 4.4. We can't terminate the whole
 *   algorithm in this case, though, as entries might be represented with an
 *   inverted set of rules.
 *
 * (5.6) The mapping table described in step (3.5) grows in size linearly with
 * the product of rules for the current field and rules for the next field. For
 * entries with non-overlapping rules in a given field, this size would grow
 * with the number of rules for this field, independently of the fact that these
 * rules might map to the same set of rules in the next field.
 *
 *      Example: one entry, 192.168.1.0-192.168.2.1:2048
 *
 *      given lookup table #0 (see example above):
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0    0                                              1,2
 *        1   1,2                                      0
 *        2    0                                      1,2
 *        3    0                              1,2
 *        4  0,1,2
 *        5    0   1   2
 *        6  0,1,2 1   1   1   1   1   1   1   1   1   1   1   1   1   1   1
 *        7   1,2 1,2  1   1   1  0,1  1   1   1   1   1   1   1   1   1   1
 *
 *      and lookup table #1 with:
 *      - rule #0: 1024 mapping to buckets
 *        < 0  0  4  0 >
 *      - rule #1: 2048 mapping to buckets
 *        < 0  0  5  0 >
 *
 *                     bucket
 *      group  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
 *        0   0,1
 *        1   0,1
 *        2                    0   1
 *        3   0,1
 *
 *      we need to map rules for 10.0.0.5 in lookup table #0 (rule #0) to 1024
 *      in lookup table #1 (rule #0) and rules for 192.168.1.0-192.168.2.1
 *      (rules #1, #2) to 2048 in lookup table #2 (rule #1):
 *
 *                 rules
 *               0   1   2
 *      map to:  0   1   1
 *
 *
 * == 6. References
 *
 * [Ligatti 2010]
 *	A Packet-classification Algorithm for Arbitrary Bitmask Rules, with
 *	Automatic Time-space Tradeoffs
 *	Jay Ligatti, Josh Kuhn, and Chris Gage.
 *	Proceedings of the IEEE International Conference on Computer
 *	Communication Networks (ICCCN), August 2010.
 *	http://www.cse.usf.edu/~ligatti/papers/grouper-conf.pdf
 *
 * [Rottenstreich 2010]
 *	Worst-Case TCAM Rule Expansion
 *	Ori Rottenstreich and Isaac Keslassy.
 *	2010 Proceedings IEEE INFOCOM, San Diego, CA, 2010.
 *	http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.212.4592&rep=rep1&type=pdf
 *
 * [Kogan 2014]
 *	SAX-PAC (Scalable And eXpressive PAcket Classification)
 *	Kirill Kogan, Sergey Nikolenko, Ori Rottenstreich, William Culhane,
 *	and Patrick Eugster.
 *	Proceedings of the 2014 ACM conference on SIGCOMM, August 2014.
 *	http://www.sigcomm.org/sites/default/files/ccr/papers/2014/August/2619239-2626294.pdf
 */

#ifndef VERBOSE
#define PERF_TEST_BLOCK		100
#endif

#include <stdio.h>
#include <arpa/inet.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <time.h>

#include "pipapo.h"
#include "set.h"
#include "match.h"
#include "util.h"

/* Specifiers for set description: label, type, length, and offset in packet */
struct desc_spec desc_specs[] = {
	{ "key",	KEY,	0,		0 },
	{ "dmac",	MAC,	MAC_LEN,	0 },
	{ "smac",	MAC,	MAC_LEN,	6 },
	{ "saddr",	ADDR,	ADDR_LEN,	26 },
	{ "daddr",	ADDR,	ADDR_LEN,	30 },
	{ "sport",	PORT,	PORT_LEN,	34 },
	{ "dport",	PORT,	PORT_LEN,	36 },
	{ "saddr6",	ADDR6,	ADDR6_LEN,	22 },
	{ "daddr6",	ADDR6,	ADDR6_LEN,	38 },
	{ "sport6",	PORT,	PORT_LEN,	54 },
	{ "dport6",	PORT,	PORT_LEN,	56 },
	{ NULL,		0,	0,		0 },
};

/**
 * desc_parse_spec() - Parse set layout from tokens on first line of set file
 * @d:		Pre-allocated set description to fill in
 * @n:		Number of field tokens
 * @tokens:	Labels of fields in desired set layout
 *
 * Return: 0 on success, negative error code on failure
 */
int desc_parse_spec(struct desc *d, int n, char (*tokens)[BUFSIZ])
{
	struct desc_spec *spec;
	int i;

	d->row_size = sizeof(enum set_ops);

	for (i = 0; i < n; i++) {
		for (spec = desc_specs; spec->label; spec++) {
			if (!strncmp(tokens[i], spec->label, BUFSIZ))
				break;
		}

		if (!spec->label) {
			fprintf(stderr, "Invalid set specifier token %s\n",
				tokens[i]);
			return -EINVAL;
		}

		d->layout[i] = spec;

		if (spec->type == KEY) {
			d->row_size += sizeof(uint32_t);
			continue;
		}

		d->fields++;
		if (spec->type == ADDR)
			d->row_size += sizeof(struct addr);
		else if (spec->type == ADDR6)
			d->row_size += sizeof(struct addr6);
		else if (spec->type == PORT)
			d->row_size += sizeof(struct port);
		else if (spec->type == MAC)
			d->row_size += sizeof(struct mac);
	}

	return 0;
}

/**
 * desc_parse_entry() - Parse set entries from set file
 * @d:		Pre-allocated set description, d->data to be filled
 * @at:		Starting position in d->data to be filled with next set entry
 * @n:		Number of fields
 * @tokens:	Set items: addresses, ports, ranges thereof
 *
 * Return: 0 on success, negative error code on failure
 */
int desc_parse_entry(struct desc *d, int at, int n, char (*tokens)[BUFSIZ])
{
	int i, base = at * d->row_size, offset, b;
	struct addr6 a6;
	struct addr a;
	struct port p;
	char *c, *end;
	struct mac m;
	uint32_t key;
	uint8_t v[6];

	if (!strncmp(tokens[0], "list", strlen(tokens[0]))) {
		*(enum set_ops *)(d->data + base) = LIST;
		return 0;
	}

	if (!strncmp(tokens[0], "add", strlen(tokens[0])))
		*(enum set_ops *)(d->data + base) = ADD;
	else if (!strncmp(tokens[0], "delete", strlen(tokens[0])))
		*(enum set_ops *)(d->data + base) = DEL;
	else
		return -EINVAL;
	offset = sizeof(enum set_ops);
	tokens++;
	n--;

	for (i = 0; i < n; i++) {
		switch (d->layout[i]->type) {
		case KEY:
			key = strtoul(tokens[i], &end, 10);
			if (*end)
				return -EINVAL;

			memcpy(d->data + base + offset, &key, sizeof(key));
			offset += sizeof(key);
			break;
		case ADDR:
			/* IPv4... */
			if ((c = strchr(tokens[i], '-'))) {
				/* range */
				if (!inet_aton(c + 1, (struct in_addr *)&a.end))
					return -EINVAL;
				a.cidr = 0;
				*c = 0;
			} else if ((c = strchr(tokens[i], '/'))) {
				/* netmask */
				*c = 0;
				a.cidr = strtoul(c + 1, &end, 10);
				if (*end)
					return -EINVAL;
			} else {
				/* address */
				a.cidr = 32;
			}

			if (!inet_aton(tokens[i], (struct in_addr *)&a.start))
				return -EINVAL;

			memcpy(d->data + base + offset, &a, sizeof(a));
			offset += sizeof(a);
			break;
		case ADDR6:
			/* IPv6... */
			if ((c = strchr(tokens[i], '-'))) {
				/* range */
				if (!inet_pton(AF_INET6, c + 1, &a6.end))
					return -EINVAL;
				a6.cidr = 0;
				*c = 0;
			} else if ((c = strchr(tokens[i], '/'))) {
				/* netmask */
				*c = 0;
				a6.cidr = strtoul(c + 1, &end, 10);
				if (*end)
					return -EINVAL;
			} else {
				/* address */
				a6.cidr = 128;
			}

			if (!inet_pton(AF_INET6, tokens[i], &a6.start))
				return -EINVAL;

			memcpy(d->data + base + offset, &a6, sizeof(a6));
			offset += sizeof(a6);
			break;
		case PORT:
			if ((c = strchr(tokens[i], '-'))) {
				*c = 0;
				p.end = htons(strtoul(c + 1, &end, 10));
				if (*end)
					return -EINVAL;
			} else {
				p.end = 0;
			}

			p.start = htons(strtoul(tokens[i], &end, 10));
			if (*end)
				return -EINVAL;

			memcpy(d->data + base + offset, &p, sizeof(p));
			offset += sizeof(p);
			break;
		case MAC:
			if ((c = strchr(tokens[i], '-')))
				*c = 0;

			b = sscanf(tokens[i],
				   "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx%*c",
				   v + 0, v + 1, v + 2, v + 3, v + 4, v + 5);
			if (b != 6)
				return -EINVAL;
			memcpy(&m.start, v, 6);

			if (c) {
				b = sscanf(c + 1,
					   "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx%*c",
					   v + 0, v + 1, v + 2,
					   v + 3, v + 4, v + 5);
				if (b != 6)
					return -EINVAL;
				memcpy(&m.end, v, 6);
			} else {
				memset(&m.end, 0, 6);
			}

			memcpy(d->data + base + offset, &m, sizeof(m));
			offset += sizeof(m);
			break;
		}
	}

	return 0;
}

/**
 * usage() - Display usage and exit
 * @argv0:	Executable name
 */
void usage(const char *argv0)
{
	fprintf(stderr, "%s <ruleset> <packets>\n", argv0);
	exit(1);
}

/**
 * packet_parse() - Parse packets and expected key from path into binary format
 * @path:	Path to packet file
 * @ret:	Pointer to packet buffer, incremented by this function
 *
 * Return: 0 on success, negative error code on failure
 */
static int packet_parse(const char *path, uint8_t **ret)
{
	int alloc, n, byte_count, packet_count = 0, err;
	uint8_t *packets, *pos, *tmp;
	char buf[BUFSIZ], *end;
	char *p;
	FILE *f;

	alloc = BUFSIZ;
	pos = packets = malloc(alloc * (sizeof(uint32_t) + PACKET_SIZE));
	if (!packets)
		return -ENOMEM;

	f = fopen(path, "r");
	if (!f) {
		err = -errno;
		goto fail;
	}

	while (fgets(buf, BUFSIZ, f)) {
		if (!*buf || *buf == '#' || *buf == ' ' || *buf == '\n')
			continue;

		if (++packet_count > alloc) {
			alloc += BUFSIZ;
			tmp = realloc(packets,
				      alloc * (sizeof(uint32_t) + PACKET_SIZE));
			if (!tmp) {
				err = -ENOMEM;
				goto fail_close;
			}
			packets = tmp;
			pos = packets;
			pos += (packet_count - 1) *
			       (sizeof(uint32_t) + PACKET_SIZE);
		}

		/* Key */
		p = strtok(buf, " \t");
		n = strtoul(p, &end, 0);
		if (*end) {
			err = -EINVAL;
			goto fail_close;
		}
		*(uint32_t *)pos = n;
		pos += sizeof(uint32_t);

		/* Packet bytes */
		byte_count = 0;
		while ((p = strtok(NULL, " \t\n"))) {
			n = strtoul(p, &end, 0);
			if (*end) {
				err = -EINVAL;
				goto fail_close;
			}
			*pos = n;
			pos++;
			if (++byte_count >= PACKET_SIZE)
				break;
		}
		if (byte_count < PACKET_SIZE) {
			memset(pos, 0, PACKET_SIZE - byte_count);
			pos += PACKET_SIZE - byte_count;
		}
	}
	fclose(f);

	*ret = packets;
	verbose("Read %i packets from %s\n", packet_count, path);

	return packet_count;

fail_close:
	fclose(f);
fail:
	free(packets);
	return err;
}

/**
 * desc_parse() - Parse set file from path
 * @path:	Path to set file
 * @d:		Pre-allocated set, with description and data parts
 *
 * Return: 0 on success, negative error code on failure
 */
int desc_parse(const char *path, struct desc *d)
{
	int first = 1, err = 0, alloc = 0, n;
	char t[9][BUFSIZ];
	char buf[BUFSIZ];
	uint8_t *tmp;
	FILE *f;

	f = fopen(path, "r");
	if (!f)
		return -errno;

	while (fgets(buf, BUFSIZ, f)) {
		n = sscanf(buf, "%s %s %s %s %s %s %s %s %s", t[0],
			   t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8]);
		if (!n || *t[0] == '#')
			continue;

		if (first) {
			/* First line is the set type specifier */
			err = desc_parse_spec(d, n, t);
			if (err)
				goto out;

			first = 0;
			continue;
		}

		if (d->entries == alloc) {
			alloc += BUFSIZ;
			tmp = realloc(d->data, alloc * d->row_size);
			if (!tmp) {
				err = -ENOMEM;
				goto out;
			}
			d->data = tmp;
		}

		err = desc_parse_entry(d, d->entries++, n, t);
	}

out:
	fclose(f);
	return err;
}

/**
 * main() - Entry point: parse set and packets, pre-compute, match
 * @argc:	Argument count
 * @argv:	Path to set description and packet files
 *
 * Return: 0 on success, 1 on matching failure, negative error code otherwise
 */
int main(int argc, char **argv)
{
	int packet_count, offset = 0, i, err;
	uint8_t *packets, *ptr;
	struct desc d = { 0 };
	struct kernel_set *ks;
	struct set s = { 0 };
	uint32_t key;
#ifdef VERBOSE
	char *total_mem_suffix = "B";
	int total_mem = 0;
	struct field *f;
#else
	struct timespec tp_start, tp_end;
	int repeat = 0, b = 0;
	double time = 0;
#endif
	(void)key;

	if (argc != 3)
		usage(argv[0]);

	verbose("=== Parsing\n");

	if ((packet_count = packet_parse(argv[2], &packets)) < 0) {
		fprintf(stderr, "Parsing failed, %s\n", strerror(packet_count));
		exit(packet_count);
	}

	if ((err = desc_parse(argv[1], &d))) {
		fprintf(stderr, "Parsing failed, %s\n", strerror(err));
		return err;
	}
	verbose("\n");

	verbose("=== Pre-computation\n");

	if ((err = init(&s, d.layout))) {
		fprintf(stderr, "Set init failed, %s\n", strerror(err));
		return err;
	}

	for (i = 0; i < d.entries; i++) {
		if (*(enum set_ops *)(d.data + offset) == ADD)
			err = add(&s, d.layout,
				  d.data + offset + sizeof(enum set_ops));
		else if (*(enum set_ops *)(d.data + offset) == LIST)
			list_or_del(&s, d.layout, NULL);
		else if (*(enum set_ops *)(d.data + offset) == DEL)
			err = list_or_del(&s, d.layout,
					  d.data + offset +
					  sizeof(enum set_ops));

		if (err)
			return err;
		offset += d.row_size;
	}

	for (i = 0; d.layout[i]->type != KEY; i++) {
		verbose("\n");
		verbose("Lookup table for %s:\n", d.layout[i]->label);
		show_lookup(&s.fields[i]);
		verbose("Mapping table for %s:\n", d.layout[i]->label);
		show_mapping(&s.fields[i], d.layout[i + 1]->type == KEY);
	}
	verbose("\n");

	verbose("=== Match\n");

	ks = kernel_init(&s, d.layout);

#ifndef VERBOSE
repeat_block:
	repeat = PERF_TEST_BLOCK;
	b++;
	clock_gettime(CLOCK_MONOTONIC, &tp_start);
	do {
#endif
		ptr = packets;
		for (i = 0; i < packet_count; i++) {
			key = match(ks, ptr + sizeof(uint32_t));
#ifdef VERBOSE
			verbose("Packet #%i => key: %u\n\n", i, key);
			if (key != *(uint32_t *)ptr) {
				verbose("FAIL: expected key: %u\n",
					*(uint32_t *)ptr);
				return 1;
			}
#endif
			ptr += PACKET_SIZE + sizeof(uint32_t);
		}
#ifndef VERBOSE
	} while (repeat--);
	clock_gettime(CLOCK_MONOTONIC, &tp_end);
	time += (tp_end.tv_sec - tp_start.tv_sec) +
	        (tp_end.tv_nsec - tp_start.tv_nsec) / 1E9;
	if (time < 5) {
		goto repeat_block;
	}

	fprintf(stdout, "Matched %lfM packets in %lfs (%lf Mpps)\n",
		PERF_TEST_BLOCK * b * packet_count / 1000.0 / 1000,
		time,
		PERF_TEST_BLOCK * b * packet_count / 1000.0 / 1000 / time);
#endif

#ifdef VERBOSE
	verbose("\n");

	verbose("\n=== Memory Usage\n");
	for_each_field(f, &s, d.layout) {
		char *lt_suffix = "B", *mt_suffix = "B", *field_suffix = "B";
		int lt_size = f->groups * BUCKETS * f->bsize;
		int mt_size = f->rules * sizeof(*f->mt);
		int field_size;

		total_mem += field_size = lt_size + mt_size + sizeof(*f);

		if (lt_size > 20 * 1024) {
			lt_size /= 1024;
			lt_suffix = "KiB";
		}

		if (mt_size > 20 * 1024) {
			mt_size /= 1024;
			mt_suffix = "KiB";
		}

		if (field_size > 20 * 1024) {
			field_size /= 1024;
			field_suffix = "KiB";
		}

		verbose("Field %i, %s, %i rules:\n", i, d.layout[i]->label,
			f->rules);
		verbose("  lookup table: %i%s\n", lt_size, lt_suffix);
		free(f->lt);
		verbose("  mapping table: %i%s\n", mt_size, mt_suffix);
		free(f->mt);
		verbose("  total: %i%s\n", field_size, field_suffix);
		verbose("\n");
	}

	if (total_mem > 50 * 1024 * 1024) {
		total_mem /= 1024 * 1024;
		total_mem_suffix = "MiB";
	} else if (total_mem > 20 * 1024) {
		total_mem /= 1024;
		total_mem_suffix = "KiB";
	}
	verbose("Total: %i%s\n", total_mem, total_mem_suffix);
#endif

	/* Rest of allocated data is volatile */
	free(s.fields);
	free(d.data);
	free(packets);
	free(ks->map[0]);
	free(ks->map[1]);
	for (i = 0; i < 8; i++) {
		free(ks->lt[i]);
		free(ks->mt[i]);
	}
	free(ks);

	return 0;
}