From 9cc702e9c7cc8a44abd31288aaefa662c434dc63 Mon Sep 17 00:00:00 2001 From: ppom Date: Wed, 25 Sep 2024 12:00:00 +0200 Subject: [PATCH] Replace postcard by bincode - The interface is easier - This permits me to have unbounded buffers. I did some benchmarking on both options. I can't see any difference in terms of - CPU performance, - memory usage, and - stockage size. I used this file: `datasize.jsonnet` ```jsonnet { patterns: { num: { regex: @'([0-9]+)', }, }, streams: { s1: { cmd: ['seq', '-w', '499999'], filters: { f1: { regex: [ '^$', ], retry: 10, retryperiod: '1m', actions: { a: { cmd: ['true'], }, b: { cmd: ['true'], after: '1m', }, }, }, }, }, }, } ``` And this commands: ``` rm reaction-* sudo systemd-run --wait -p User=ao -p MemoryAccounting=yes -p WorkingDirectory=(pwd) -p Environment=PATH=/run/current-system/sw/bin/ time ./target/release/reaction start -c datasize.jsonnet && ls -l reaction-matches.db sudo systemd-run --wait -p User=ao -p MemoryAccounting=yes -p WorkingDirectory=(pwd) -p Environment=PATH=/run/current-system/sw/bin/ time ./target/release/reaction start -c datasize.jsonnet && ls -l reaction-matches.db sudo systemd-run --wait -p User=ao -p MemoryAccounting=yes -p WorkingDirectory=(pwd) -p Environment=PATH=/run/current-system/sw/bin/ time ./target/release/reaction start -c datasize.jsonnet && ls -l reaction-matches.db ``` At the first invocation, reaction reads no DB. At the second invocation, reaction reads a DB. At the third invocation, reaction reads a double-sized DB. --- rust/Cargo.lock | 123 +-------------------------- rust/Cargo.toml | 2 +- rust/datasize.jsonnet | 30 +++++++ rust/src/daemon/database/lowlevel.rs | 62 ++++++++------ rust/src/daemon/database/mod.rs | 2 +- 5 files changed, 69 insertions(+), 150 deletions(-) create mode 100644 rust/datasize.jsonnet diff --git a/rust/Cargo.lock b/rust/Cargo.lock index d2d897b..4d0efc4 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -85,15 +85,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "atomic-polyfill" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" -dependencies = [ - "critical-section", -] - [[package]] name = "autocfg" version = "1.3.0" @@ -127,12 +118,6 @@ version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "cc" version = "1.1.6" @@ -214,12 +199,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" -[[package]] -name = "cobs" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" - [[package]] name = "colorchoice" version = "1.0.2" @@ -232,12 +211,6 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" -[[package]] -name = "critical-section" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f64009896348fc5af4222e9cf7d7d82a95a256c634ebcf61c53e4ea461422242" - [[package]] name = "ctrlc" version = "3.4.4" @@ -248,18 +221,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "embedded-io" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" - -[[package]] -name = "embedded-io" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" - [[package]] name = "equivalent" version = "1.0.1" @@ -282,35 +243,12 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" -[[package]] -name = "hash32" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67" -dependencies = [ - "byteorder", -] - [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -[[package]] -name = "heapless" -version = "0.7.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" -dependencies = [ - "atomic-polyfill", - "hash32", - "rustc_version", - "serde", - "spin", - "stable_deref_trait", -] - [[package]] name = "heck" version = "0.5.0" @@ -472,16 +410,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.22" @@ -570,19 +498,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c719dcf55f09a3a7e764c6649ab594c18a177e3599c467983cdf644bfc0a4088" -[[package]] -name = "postcard" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e" -dependencies = [ - "cobs", - "embedded-io 0.4.0", - "embedded-io 0.6.1", - "heapless", - "serde", -] - [[package]] name = "proc-macro2" version = "1.0.86" @@ -605,6 +520,7 @@ dependencies = [ name = "reaction" version = "0.1.0" dependencies = [ + "bincode", "chrono", "clap", "clap_complete", @@ -612,7 +528,6 @@ dependencies = [ "jrsonnet-evaluator", "log", "num_cpus", - "postcard", "regex", "serde", "serde_json", @@ -657,15 +572,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustix" version = "0.38.34" @@ -685,18 +591,6 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "semver" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" - [[package]] name = "serde" version = "1.0.204" @@ -742,21 +636,6 @@ dependencies = [ "unsafe-libyaml", ] -[[package]] -name = "spin" -version = "0.9.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" -dependencies = [ - "lock_api", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - [[package]] name = "strsim" version = "0.11.1" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 9df7cce..4503756 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +bincode = "1.3.3" chrono = { version = "0.4.38", features = ["std", "clock"] } clap = { version = "4.5.4", features = ["derive"] } clap_complete = "4.5.2" @@ -13,7 +14,6 @@ ctrlc = { version = "3.4.4", features = ["termination"] } jrsonnet-evaluator = "0.4.2" log = { version = "0.4.22", features = ["std"] } num_cpus = "1.16.0" -postcard = { version = "1.0.10", features = ["use-std"] } regex = "1.10.4" serde = { version = "1.0.203", features = ["derive"] } serde_json = "1.0.117" diff --git a/rust/datasize.jsonnet b/rust/datasize.jsonnet new file mode 100644 index 0000000..2c842df --- /dev/null +++ b/rust/datasize.jsonnet @@ -0,0 +1,30 @@ +{ + patterns: { + num: { + regex: @'([0-9]+)', + }, + }, + streams: { + s1: { + cmd: ['seq', '-w', '499999'], + filters: { + f1: { + regex: [ + '^$', + ], + retry: 10, + retryperiod: '1m', + actions: { + a: { + cmd: ['true'], + }, + b: { + cmd: ['true'], + after: '1m', + }, + }, + }, + }, + }, + }, +} diff --git a/rust/src/daemon/database/lowlevel.rs b/rust/src/daemon/database/lowlevel.rs index f6af9ae..7ed900d 100644 --- a/rust/src/daemon/database/lowlevel.rs +++ b/rust/src/daemon/database/lowlevel.rs @@ -6,9 +6,10 @@ use std::{ process::exit, }; +use bincode::Options; use chrono::{DateTime, Local}; use log::{debug, error, warn}; -use serde::{Deserialize, Serialize}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; use crate::concepts::{Config, Filter, LogEntry, Match}; @@ -22,13 +23,20 @@ type DatabaseHeader = BTreeMap; type ReadHeader = BTreeMap; type WriteHeader = BTreeMap<&'static Filter, usize>; -const BUFFER_MAX_SIZE: usize = 10 * 1024 * 1024; +type BinOptions = bincode::config::WithOtherIntEncoding< + bincode::config::DefaultOptions, + bincode::config::VarintEncoding, +>; +fn bin_options() -> BinOptions { + bincode::DefaultOptions::new().with_varint_encoding() +} + const DB_SIGNATURE: &str = "reaction-db-v01"; pub struct ReadDB { f: BufReader, h: ReadHeader, - buf: Vec, + bin: BinOptions, } impl ReadDB { @@ -51,13 +59,18 @@ impl ReadDB { let mut ret = ReadDB { f: BufReader::new(file), - h: BTreeMap::new(), - buf: vec![0; BUFFER_MAX_SIZE], + h: BTreeMap::default(), + bin: bin_options(), }; - match ret.read::<&str>() { - Ok(DB_SIGNATURE) => Ok(()), - Ok(_) => Err(DBError::Error("database is not a reaction database".into())), + match ret.read::() { + Ok(signature) => { + if DB_SIGNATURE == &signature { + Ok(()) + } else { + Err(DBError::Error("database is not a reaction database".into())) + } + } Err(err) => Err(DBError::Error(format!("reading database signature: {err}"))), }?; @@ -73,8 +86,8 @@ impl ReadDB { Ok(Some(ret)) } - fn read<'a, T: Deserialize<'a> + Debug>(&'a mut self) -> Result { - let (decoded, _) = postcard::from_io::((&mut self.f, &mut self.buf))?; + fn read(&mut self) -> Result { + let decoded = self.bin.deserialize_from::<_, T>(&mut self.f)?; debug!("reading this: {:?}", &decoded); Ok(decoded) } @@ -84,14 +97,15 @@ impl Iterator for ReadDB { type Item = Result; fn next(&mut self) -> Option { - match self.read::() { - // FIXME why we got a default item instead of an error or something? - // How do we really know we reached the end? - // For now, checking if time is 0 - Ok(ComputedLogEntry { t: 0, .. }) => None, + let res = self.read::(); + debug!("{res:?}"); + match res { Ok(item) => Some(item.to(&self.h)), - Err(err) => match err { - postcard::Error::DeserializeUnexpectedEnd => None, + Err(err) => match *err { + bincode::ErrorKind::Io(err) => match err.kind() { + io::ErrorKind::UnexpectedEof => None, + _ => Some(Err(err.into())), + }, _ => Some(Err(err.into())), }, } @@ -101,7 +115,7 @@ impl Iterator for ReadDB { pub struct WriteDB { f: BufWriter, h: WriteHeader, - buf: Vec, + bin: BinOptions, } impl WriteDB { @@ -116,8 +130,8 @@ impl WriteDB { let mut ret = WriteDB { f: BufWriter::new(file), - h: BTreeMap::new(), - buf: vec![0; BUFFER_MAX_SIZE], + h: BTreeMap::default(), + bin: bin_options(), }; if let Err(err) = ret._write(DB_SIGNATURE) { @@ -153,13 +167,9 @@ impl WriteDB { } fn _write(&mut self, data: T) -> Result<(), DBError> { - let encoded = postcard::to_slice(&data, &mut self.buf)?; + let encoded = self.bin.serialize(&data)?; debug!("writing this: {:?}, {:?}", &data, &encoded); - self.f.write_all(encoded)?; - // clear - // for i in 0..self.buf.len() { - // self.buf[i] = 0; - // } + self.f.write_all(&encoded)?; Ok(()) } diff --git a/rust/src/daemon/database/mod.rs b/rust/src/daemon/database/mod.rs index 0552ece..1049fa6 100644 --- a/rust/src/daemon/database/mod.rs +++ b/rust/src/daemon/database/mod.rs @@ -30,7 +30,7 @@ pub enum DBError { #[error("invalid filter: {0}")] InvalidFilterError(String), #[error("decode error: {0}")] - PostcardError(#[from] postcard::Error), + BincodeError(#[from] bincode::Error), #[error("io error: {0}")] IOError(#[from] io::Error), #[error("{0}")]