Last active
May 4, 2023 16:48
-
-
Save stevemk14ebr/24efd412d263842c58410f8775f97572 to your computer and use it in GitHub Desktop.
Rust Pattern Matching Benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use regex::bytes; | |
use microbench::{self, Options}; | |
fn get_pattern_size(signature: &[u8]) -> usize { | |
// c = 2 * b + (b - 1) . 2 chars per byte + b - 1 spaces between | |
(signature.len() + 1) / 3 | |
} | |
fn get_bits(x: u8) -> u8 { | |
// ascii numbers to byte | |
if x >= b'0' && x <= b'9' { | |
x - b'0' | |
} else { // ascii letters to hex byte | |
// & 0xDF converts lowercase ascii to uppercase | |
(x & 0xDF).wrapping_sub(b'A') + 0xa | |
} | |
} | |
// signature must use ? as mask for nibbles, have a space between each byte, must not prefix bytes with 0x or \x, and be ascii. | |
// upper and lowercase supported. | |
fn find_pattern(signature: &[u8], data: &[u8]) -> Vec<u64> { | |
let mut matches = Vec::new(); | |
let pattern_size = get_pattern_size(signature); | |
for i in 0..data.len() { | |
let mut sig_idx = 0; | |
while sig_idx < pattern_size && (i+sig_idx) < data.len() { | |
let sig_pat_idx = sig_idx * 3; | |
let mut sig_hi = get_bits(signature[sig_pat_idx]) << 4; | |
let mut sig_lo = get_bits(signature[sig_pat_idx + 1]); | |
let dat_byt = data[i + sig_idx]; | |
// check for ex: A? | |
if signature[sig_pat_idx + 1] == b'?' { | |
sig_lo = dat_byt & 0xF; | |
} | |
if signature[sig_pat_idx] == b'?' { | |
sig_hi = dat_byt & 0xF0; | |
} | |
if dat_byt != (sig_hi | sig_lo) { | |
break; | |
} | |
sig_idx += 1; | |
} | |
if sig_idx >= pattern_size { | |
matches.push(i as u64); | |
} | |
} | |
matches | |
} | |
fn basic_match(haystack: &[u8]) -> Vec<u64> { | |
let signature = "AA B? ?? ?? CA BB CC"; | |
let basic_matches = find_pattern(signature.as_bytes(), &haystack); | |
basic_matches | |
} | |
fn re_match(re: &bytes::Regex, haystack: &[u8]) -> Vec<u64> { | |
let re_matches: Vec<u64> = re.find_iter(&haystack).map(|result| | |
result.start() as u64 | |
).collect(); | |
re_matches | |
} | |
fn main() { | |
let haystack = [0xAA, 0xBD, 0xCC, 0xDD, 0xCA, 0xBB, 0xCC, 0xAA, 0xBD, 0xCC, 0xDD, 0xCA, 0xBB, 0xCC]; | |
// Regex can't match leading nibbles though ;) ?A is not easy to represent | |
let re = bytes::Regex::new(r"(?-u)\xAA[\xB0-\xBF][\x00-\xFF][\x00-\xFF]\xCA\xBB\xCC").unwrap(); | |
let options = Options::default(); | |
microbench::bench(&options, "basic_match", || basic_match(&haystack)); | |
microbench::bench(&options, "re_match", || re_match(&re, &haystack)); | |
} | |
// PS C:\source\repos\qsig_rust> .\target\release\qsig_rust.exe | |
// basic_match (5.0s) ... 60.644 ns/iter (0.998 R²) | |
// re_match (5.0s) ... 202.655 ns/iter (0.999 R²) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment