Make IP regex much more robust and add tests

IP will be correctly extracted in any regex line, even if it is
surrounded by greedy catch-all: .*<ip>.*

This what actually hard to do!
This commit is contained in:
ppom 2025-07-29 12:00:00 +02:00
commit 19e3b2bf98
No known key found for this signature in database
4 changed files with 244 additions and 81 deletions

View file

@ -26,22 +26,90 @@ impl PatternType {
}
pub fn regex(&self) -> Option<String> {
let num4 = r#"(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)"#;
let num6 = r#"[0-9a-fA-F]{1,4}"#;
// Those orders of preference are very important for <ip>
// patterns that have greedy catch-all regexes becore or after them,
// for example: "Failed password .*<ip>.*"
let num4 = [
// Order is important, first is preferred.
// first 25x
"(?:25[0-5]",
// then 2xx
"2[0-4][0-9]",
// then 1xx
"1[0-9][0-9]",
// then 0xx
"[0-9][0-9]",
// then 0x
"[0-9])",
]
.join("|");
let numsix = "[0-9a-fA-F]{1,4}";
let ipv4 = format!(r#"{num4}(?:\.{num4}){{3}}"#);
let ipv6 = [
format!(r#"(?:{num6}:){{7}}{num6}"#),
format!(r#"(?:{num6}:){{1,7}}:"#),
format!(r#"(?:{num6}:){{1,6}}:{num6}"#),
format!(r#"(?:{num6}:){{1,5}}(?::{num6}){{1,2}}"#),
format!(r#"(?:{num6}:){{1,4}}(?::{num6}){{1,3}}"#),
format!(r#"(?:{num6}:){{1,3}}(?::{num6}){{1,4}}"#),
format!(r#"(?:{num6}:){{1,2}}(?::{num6}){{1,5}}"#),
format!(r#"{num6}:(?:(?::{num6}){{1,6}})"#),
format!(r#":(?:(?::{num6}){{1,7}}|:)"#),
format!(r#"fe80:(?::[0-9a-fA-F]{{0,4}}){{0,4}}%[0-9a-zA-Z]+"#),
// We're unrolling all possibilities, longer IPv6 first,
// to make it super-greedy,
// more than an eventual .* before or after <ip> ,
// that would "eat" its first or last blocks.
// Order is important, first is preferred.
// We put IPv4-suffixed regexes first
format!(r#"::(?:ffff(?::0{{1,4}})?:)?{ipv4}"#),
format!(r#"(?:{num6}:){{1,4}}:{ipv4}"#),
format!(r#"(?:{numsix}:){{1,4}}:{ipv4}"#),
// Then link-local addresses with interface name
format!(r#"fe80:(?::[0-9a-fA-F]{{0,4}}){{0,4}}%[0-9a-zA-Z]+"#),
// Full IPv6
format!("(?:{numsix}:){{7}}{numsix}"),
// 1 block cut
format!("(?:{numsix}:){{7}}:"),
format!("(?:{numsix}:){{6}}:{numsix}"),
format!("(?:{numsix}:){{5}}(?::{numsix}){{2}}"),
format!("(?:{numsix}:){{4}}(?::{numsix}){{3}}"),
format!("(?:{numsix}:){{3}}(?::{numsix}){{4}}"),
format!("(?:{numsix}:){{2}}(?::{numsix}){{5}}"),
format!("{numsix}:(?:(?::{numsix}){{6}})"),
format!(":(?:(?::{numsix}){{7}})"),
// 2 blocks cut
format!("(?:{numsix}:){{6}}:"),
format!("(?:{numsix}:){{5}}:{numsix}"),
format!("(?:{numsix}:){{4}}(?::{numsix}){{2}}"),
format!("(?:{numsix}:){{3}}(?::{numsix}){{3}}"),
format!("(?:{numsix}:){{2}}(?::{numsix}){{4}}"),
format!("{numsix}:(?:(?::{numsix}){{5}})"),
format!(":(?:(?::{numsix}){{6}})"),
// 3 blocks cut
format!("(?:{numsix}:){{5}}:"),
format!("(?:{numsix}:){{4}}:{numsix}"),
format!("(?:{numsix}:){{3}}(?::{numsix}){{2}}"),
format!("(?:{numsix}:){{2}}(?::{numsix}){{3}}"),
format!("{numsix}:(?:(?::{numsix}){{4}})"),
format!(":(?:(?::{numsix}){{5}})"),
// 4 blocks cut
format!("(?:{numsix}:){{4}}:"),
format!("(?:{numsix}:){{3}}:{numsix}"),
format!("(?:{numsix}:){{2}}(?::{numsix}){{2}}"),
format!("{numsix}:(?:(?::{numsix}){{3}})"),
format!(":(?:(?::{numsix}){{4}})"),
// 5 blocks cut
format!("(?:{numsix}:){{3}}:"),
format!("(?:{numsix}:){{2}}:{numsix}"),
format!("{numsix}:(?:(?::{numsix}){{2}})"),
format!(":(?:(?::{numsix}){{3}})"),
// 6 blocks cut
format!("(?:{numsix}:){{2}}:"),
format!("{numsix}::{numsix}"),
format!(":(?:(?::{numsix}){{2}})"),
// 7 blocks cut
format!("{numsix}::"),
format!("::{numsix}"),
// special cuts
// 8 blocks cut
format!("::"),
]
.join("|");
match self {
@ -211,7 +279,13 @@ impl PatternIp {
mod patternip_tests {
use std::net::{Ipv4Addr, Ipv6Addr};
use crate::concepts::{pattern::ip::Cidr, Pattern};
use chrono::Local;
use tokio::{fs::read_to_string, task::JoinSet};
use crate::{
concepts::{pattern::ip::Cidr, Action, Duplicate, Filter, Pattern},
daemon::{tests::TestBed, React},
};
use super::{PatternIp, PatternType};
@ -459,6 +533,73 @@ mod patternip_tests {
}
}
pub const VALID_IPV4: [&str; 8] = [
"252.4.92.250",
"212.4.92.210",
"112.4.92.110",
"83.4.92.35",
"83.4.92.0",
"3.254.92.4",
"1.2.3.4",
"255.255.255.255",
];
pub const VALID_IPV6: [&str; 42] = [
// all accepted characters
"0123:4567:89:ab:cdef:AB:CD:EF",
// ipv6-mapped ipv4
"::ffff:1.2.3.4",
"ffff::1.2.3.4",
// 8 blocks
"1111:2:3:4:5:6:7:8888",
// 7 blocks
"::2:3:4:5:6:7:8888",
"1111::3:4:5:6:7:8888",
"1111:2::4:5:6:7:8888",
"1111:2:3::5:6:7:8888",
"1111:2:3:4::6:7:8888",
"1111:2:3:4:5::7:8888",
"1111:2:3:4:5:6::8888",
"1111:2:3:4:5:6:7::",
// 6 blocks
"::3:4:5:6:7:8888",
"1111::4:5:6:7:8888",
"1111:2::5:6:7:8888",
"1111:2:3::6:7:8888",
"1111:2:3:4::7:8888",
"1111:2:3:4:5::8888",
"1111:2:3:4:5:6::",
// 5 blocks
"::4:5:6:7:8888",
"1111::5:6:7:8888",
"1111:2::6:7:8888",
"1111:2:3::7:8888",
"1111:2:3:4::8888",
"1111:2:3:4:5::",
// 4 blocks
"::5:6:7:8888",
"1111::6:7:8888",
"1111:2::7:8888",
"1111:2:3::8888",
"1111:2:3:4::",
// 3 blocks
"::6:7:8888",
"1111::7:8888",
"1111:2::8888",
"1111:2:3::",
// 2 blocks
"::7:8888",
"1111::8888",
"1111:2::",
// 1 block
"::8",
"::8888",
"1::",
"1111::",
// 0 block
"::",
];
#[test]
fn test_ip_regexes() {
for pattern_type in [PatternType::Ip, PatternType::Ipv4, PatternType::Ipv6] {
@ -481,8 +622,9 @@ mod patternip_tests {
};
}
assert2!(accepts_ipv4 == regex.is_match("1.2.3.4"));
assert2!(accepts_ipv4 == regex.is_match("255.255.255.255"));
for ip in VALID_IPV4 {
assert2!(accepts_ipv4 == regex.is_match(ip));
}
assert2!(!regex.is_match(".1.2.3.4"));
assert2!(!regex.is_match(" 1.2.3.4"));
@ -493,14 +635,9 @@ mod patternip_tests {
assert2!(!regex.is_match("1.2..4"));
assert2!(!regex.is_match("1.2..3.4"));
assert2!(accepts_ipv6 == regex.is_match("1:2:3:4:5:6:7:8"));
assert2!(accepts_ipv6 == regex.is_match("::"));
assert2!(accepts_ipv6 == regex.is_match("1::"));
assert2!(accepts_ipv6 == regex.is_match("::1"));
assert2!(accepts_ipv6 == regex.is_match("1:2::6:7:8"));
assert2!(accepts_ipv6 == regex.is_match("0123:4567:89:ab:cdef:AB:CD:EF"));
assert2!(accepts_ipv6 == regex.is_match("::ffff:1.2.3.4"));
assert2!(accepts_ipv6 == regex.is_match("ffff::1.2.3.4"));
for ip in VALID_IPV6 {
assert2!(accepts_ipv6 == regex.is_match(ip));
}
assert2!(!regex.is_match("1:"));
assert2!(!regex.is_match("1:::"));
@ -510,6 +647,74 @@ mod patternip_tests {
assert2!(!regex.is_match("1:2:3:4:5:6:7:8:"));
}
}
#[tokio::test(flavor = "multi_thread")]
async fn ip_pattern_matches() {
let mut join_set = JoinSet::new();
for ip in VALID_IPV4.iter().chain(&VALID_IPV6) {
for line in [
format!("borned {ip} test"),
//
format!("right-unborned {ip} text"),
format!("right-unborned {ip}text"),
format!("right-unborned {ip}:"),
//
format!("left-unborned text {ip}"),
format!("left-unborned text{ip}"),
format!("left-unborned :{ip}"),
//
format!("full-unborned text {ip} text"),
format!("full-unborned text{ip} text"),
format!("full-unborned text {ip}text"),
format!("full-unborned text{ip}text"),
format!("full-unborned :{ip}:"),
format!("full-unborned : {ip}:"),
] {
join_set.spawn(tokio::spawn(async move {
let bed = TestBed::new();
let filter = Filter::new_static(
vec![Action::new(
vec!["sh", "-c", &format!("echo <ip> >> {}", &bed.out_file)],
None,
false,
"test",
"test",
"a1",
&bed.ip_patterns,
)],
vec![
"^borned <ip> test",
"^right-unborned <ip>.*",
"^left-unborned .*<ip>",
"^full-unborned .*<ip>.*",
],
None,
None,
"test",
"test",
Duplicate::Ignore,
&bed.ip_patterns,
);
let bed = bed.part2(filter, Local::now(), None).await;
assert_eq!(
bed.manager.handle_line(&line, Local::now()),
React::Trigger,
"line: {line}"
);
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
assert_eq!(
&read_to_string(&bed.out_file).await.unwrap().trim_end(),
ip,
"line: {line}"
);
println!("line ok: {line}");
}));
}
}
join_set.join_all().await;
}
}
/// Normalize a string as an IP address.

View file

@ -1,5 +1,5 @@
#[cfg(test)]
mod tests;
pub mod tests;
mod state;

View file

@ -16,7 +16,7 @@ use crate::{
tests::TempDatabase,
};
struct TestBed {
pub struct TestBed {
pub _out_path: TempPath,
pub out_file: String,
pub az_patterns: Patterns,
@ -24,7 +24,7 @@ struct TestBed {
}
impl TestBed {
fn new() -> Self {
pub fn new() -> Self {
let _out_path = tempfile::NamedTempFile::new().unwrap().into_temp_path();
let out_file = _out_path.to_str().unwrap().to_string();
@ -71,7 +71,12 @@ ignorecidr:
}
}
async fn part2(self, filter: &'static Filter, now: Time, db: Option<TempDatabase>) -> TestBed2 {
pub async fn part2(
self,
filter: &'static Filter,
now: Time,
db: Option<TempDatabase>,
) -> TestBed2 {
let mut db = match db {
Some(db) => db,
None => TempDatabase::default().await,
@ -97,7 +102,7 @@ ignorecidr:
}
}
struct TestBed2 {
pub struct TestBed2 {
pub _out_path: TempPath,
pub out_file: String,
pub semaphore: Arc<Semaphore>,
@ -108,7 +113,7 @@ struct TestBed2 {
}
impl TestBed2 {
fn assert_empty_trees(&self) {
pub fn assert_empty_trees(&self) {
let state = self.manager.state.lock().unwrap();
assert!(state.matches.is_empty(), "matches must be empty");
assert!(
@ -738,54 +743,3 @@ async fn multiple_triggers() {
);
}
}
#[tokio::test]
async fn ip_pattern_matches() {
let bed = TestBed::new();
let filter = Filter::new_static(
vec![Action::new(
vec!["sh", "-c", &format!("echo <ip> >> {}", &bed.out_file)],
None,
false,
"test",
"test",
"a1",
&bed.ip_patterns,
)],
vec!["borned <ip> test", "unborned <ip>.*"],
None,
None,
"test",
"test",
Duplicate::Ignore,
&bed.ip_patterns,
);
let bed = bed.part2(filter, Local::now(), None).await;
let ips = [
// IPv4
("83.4.92.35", "83.4.92.35"),
("83.4.92.0", "83.4.92.0"),
// Normal IPv6
("1:2:3:4:5:6:7:08", "1:2:3:4:5:6:7:8"),
// IPv6 with ::
("1:2:3:04:0:0:7:8", "1:2:3:4::7:8"),
("1:2:3:4:0::", "1:2:3:4::"),
// IPv6-mapped IPv4 ::
("ffff::1.2.3.4", "1.2.3.4"),
];
for (ip, ip_normalized) in ips {
assert_eq!(
bed.manager
.handle_line(&format!("borned {ip} test"), Local::now()),
React::Trigger
);
tokio::time::sleep(Duration::from_millis(50)).await;
assert_eq!(
read_to_string(&bed.out_file).unwrap().trim_end(),
ip_normalized
);
tokio::fs::write(&bed.out_file, "").await.unwrap();
}
}

View file

@ -18,10 +18,14 @@ use tracing::{debug, info};
use crate::{concepts::Config, treedb::Database};
use filter::FilterManager;
pub use filter::React;
pub use shutdown::{ShutdownController, ShutdownDelegate, ShutdownToken};
use socket::socket_manager;
use stream::StreamManager;
#[cfg(test)]
pub use filter::tests;
mod filter;
mod shutdown;
mod socket;