Skip to content

Instantly share code, notes, and snippets.

@stevemk14ebr
Last active May 4, 2023 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save stevemk14ebr/24efd412d263842c58410f8775f97572 to your computer and use it in GitHub Desktop.
Save stevemk14ebr/24efd412d263842c58410f8775f97572 to your computer and use it in GitHub Desktop.
Rust Pattern Matching Benchmark
use regex::bytes;
use microbench::{self, Options};
fn get_pattern_size(signature: &[u8]) -> usize {
// c = 2 * b + (b - 1) . 2 chars per byte + b - 1 spaces between
(signature.len() + 1) / 3
}
fn get_bits(x: u8) -> u8 {
// ascii numbers to byte
if x >= b'0' && x <= b'9' {
x - b'0'
} else { // ascii letters to hex byte
// & 0xDF converts lowercase ascii to uppercase
(x & 0xDF).wrapping_sub(b'A') + 0xa
}
}
// signature must use ? as mask for nibbles, have a space between each byte, must not prefix bytes with 0x or \x, and be ascii.
// upper and lowercase supported.
fn find_pattern(signature: &[u8], data: &[u8]) -> Vec<u64> {
let mut matches = Vec::new();
let pattern_size = get_pattern_size(signature);
for i in 0..data.len() {
let mut sig_idx = 0;
while sig_idx < pattern_size && (i+sig_idx) < data.len() {
let sig_pat_idx = sig_idx * 3;
let mut sig_hi = get_bits(signature[sig_pat_idx]) << 4;
let mut sig_lo = get_bits(signature[sig_pat_idx + 1]);
let dat_byt = data[i + sig_idx];
// check for ex: A?
if signature[sig_pat_idx + 1] == b'?' {
sig_lo = dat_byt & 0xF;
}
if signature[sig_pat_idx] == b'?' {
sig_hi = dat_byt & 0xF0;
}
if dat_byt != (sig_hi | sig_lo) {
break;
}
sig_idx += 1;
}
if sig_idx >= pattern_size {
matches.push(i as u64);
}
}
matches
}
fn basic_match(haystack: &[u8]) -> Vec<u64> {
let signature = "AA B? ?? ?? CA BB CC";
let basic_matches = find_pattern(signature.as_bytes(), &haystack);
basic_matches
}
fn re_match(re: &bytes::Regex, haystack: &[u8]) -> Vec<u64> {
let re_matches: Vec<u64> = re.find_iter(&haystack).map(|result|
result.start() as u64
).collect();
re_matches
}
fn main() {
let haystack = [0xAA, 0xBD, 0xCC, 0xDD, 0xCA, 0xBB, 0xCC, 0xAA, 0xBD, 0xCC, 0xDD, 0xCA, 0xBB, 0xCC];
// Regex can't match leading nibbles though ;) ?A is not easy to represent
let re = bytes::Regex::new(r"(?-u)\xAA[\xB0-\xBF][\x00-\xFF][\x00-\xFF]\xCA\xBB\xCC").unwrap();
let options = Options::default();
microbench::bench(&options, "basic_match", || basic_match(&haystack));
microbench::bench(&options, "re_match", || re_match(&re, &haystack));
}
// PS C:\source\repos\qsig_rust> .\target\release\qsig_rust.exe
// basic_match (5.0s) ... 60.644 ns/iter (0.998 R²)
// re_match (5.0s) ... 202.655 ns/iter (0.999 R²)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment