Replace postcard by bincode

- The interface is easier
- This permits me to have unbounded buffers.

I did some benchmarking on both options.
I can't see any difference in terms of
- CPU performance,
- memory usage, and
- stockage size.

I used this file: `datasize.jsonnet`
```jsonnet
{
  patterns: {
    num: {
      regex: @'([0-9]+)',
    },
  },
  streams: {
    s1: {
      cmd: ['seq', '-w', '499999'],
      filters: {
        f1: {
          regex: [
            '^<num>$',
          ],
          retry: 10,
          retryperiod: '1m',
          actions: {
            a: {
              cmd: ['true'],
            },
            b: {
              cmd: ['true'],
              after: '1m',
            },
          },
        },
      },
    },
  },
}
```
And this commands:

```
rm reaction-*
sudo systemd-run --wait -p User=ao -p MemoryAccounting=yes -p WorkingDirectory=(pwd) -p Environment=PATH=/run/current-system/sw/bin/ time ./target/release/reaction start -c datasize.jsonnet && ls -l reaction-matches.db
sudo systemd-run --wait -p User=ao -p MemoryAccounting=yes -p WorkingDirectory=(pwd) -p Environment=PATH=/run/current-system/sw/bin/ time ./target/release/reaction start -c datasize.jsonnet && ls -l reaction-matches.db
sudo systemd-run --wait -p User=ao -p MemoryAccounting=yes -p WorkingDirectory=(pwd) -p Environment=PATH=/run/current-system/sw/bin/ time ./target/release/reaction start -c datasize.jsonnet && ls -l reaction-matches.db
```
At the first invocation, reaction reads no DB.
At the second invocation, reaction reads a DB.
At the third invocation, reaction reads a double-sized DB.
This commit is contained in:
ppom 2024-09-25 12:00:00 +02:00
commit 9cc702e9c7
5 changed files with 69 additions and 150 deletions

123
rust/Cargo.lock generated
View file

@ -85,15 +85,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "atomic-polyfill"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4"
dependencies = [
"critical-section",
]
[[package]]
name = "autocfg"
version = "1.3.0"
@ -127,12 +118,6 @@ version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "cc"
version = "1.1.6"
@ -214,12 +199,6 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "cobs"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
[[package]]
name = "colorchoice"
version = "1.0.2"
@ -232,12 +211,6 @@ version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
[[package]]
name = "critical-section"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f64009896348fc5af4222e9cf7d7d82a95a256c634ebcf61c53e4ea461422242"
[[package]]
name = "ctrlc"
version = "3.4.4"
@ -248,18 +221,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "embedded-io"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced"
[[package]]
name = "embedded-io"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
[[package]]
name = "equivalent"
version = "1.0.1"
@ -282,35 +243,12 @@ version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6"
[[package]]
name = "hash32"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c35f58762feb77d74ebe43bdbc3210f09be9fe6742234d573bacc26ed92b67"
dependencies = [
"byteorder",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "heapless"
version = "0.7.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f"
dependencies = [
"atomic-polyfill",
"hash32",
"rustc_version",
"serde",
"spin",
"stable_deref_trait",
]
[[package]]
name = "heck"
version = "0.5.0"
@ -472,16 +410,6 @@ version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "lock_api"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.22"
@ -570,19 +498,6 @@ version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c719dcf55f09a3a7e764c6649ab594c18a177e3599c467983cdf644bfc0a4088"
[[package]]
name = "postcard"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f7f0a8d620d71c457dd1d47df76bb18960378da56af4527aaa10f515eee732e"
dependencies = [
"cobs",
"embedded-io 0.4.0",
"embedded-io 0.6.1",
"heapless",
"serde",
]
[[package]]
name = "proc-macro2"
version = "1.0.86"
@ -605,6 +520,7 @@ dependencies = [
name = "reaction"
version = "0.1.0"
dependencies = [
"bincode",
"chrono",
"clap",
"clap_complete",
@ -612,7 +528,6 @@ dependencies = [
"jrsonnet-evaluator",
"log",
"num_cpus",
"postcard",
"regex",
"serde",
"serde_json",
@ -657,15 +572,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.38.34"
@ -685,18 +591,6 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "semver"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b"
[[package]]
name = "serde"
version = "1.0.204"
@ -742,21 +636,6 @@ dependencies = [
"unsafe-libyaml",
]
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
dependencies = [
"lock_api",
]
[[package]]
name = "stable_deref_trait"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "strsim"
version = "0.11.1"

View file

@ -6,6 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
bincode = "1.3.3"
chrono = { version = "0.4.38", features = ["std", "clock"] }
clap = { version = "4.5.4", features = ["derive"] }
clap_complete = "4.5.2"
@ -13,7 +14,6 @@ ctrlc = { version = "3.4.4", features = ["termination"] }
jrsonnet-evaluator = "0.4.2"
log = { version = "0.4.22", features = ["std"] }
num_cpus = "1.16.0"
postcard = { version = "1.0.10", features = ["use-std"] }
regex = "1.10.4"
serde = { version = "1.0.203", features = ["derive"] }
serde_json = "1.0.117"

30
rust/datasize.jsonnet Normal file
View file

@ -0,0 +1,30 @@
{
patterns: {
num: {
regex: @'([0-9]+)',
},
},
streams: {
s1: {
cmd: ['seq', '-w', '499999'],
filters: {
f1: {
regex: [
'^<num>$',
],
retry: 10,
retryperiod: '1m',
actions: {
a: {
cmd: ['true'],
},
b: {
cmd: ['true'],
after: '1m',
},
},
},
},
},
},
}

View file

@ -6,9 +6,10 @@ use std::{
process::exit,
};
use bincode::Options;
use chrono::{DateTime, Local};
use log::{debug, error, warn};
use serde::{Deserialize, Serialize};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use crate::concepts::{Config, Filter, LogEntry, Match};
@ -22,13 +23,20 @@ type DatabaseHeader = BTreeMap<usize, (String, String)>;
type ReadHeader = BTreeMap<usize, &'static Filter>;
type WriteHeader = BTreeMap<&'static Filter, usize>;
const BUFFER_MAX_SIZE: usize = 10 * 1024 * 1024;
type BinOptions = bincode::config::WithOtherIntEncoding<
bincode::config::DefaultOptions,
bincode::config::VarintEncoding,
>;
fn bin_options() -> BinOptions {
bincode::DefaultOptions::new().with_varint_encoding()
}
const DB_SIGNATURE: &str = "reaction-db-v01";
pub struct ReadDB {
f: BufReader<File>,
h: ReadHeader,
buf: Vec<u8>,
bin: BinOptions,
}
impl ReadDB {
@ -51,13 +59,18 @@ impl ReadDB {
let mut ret = ReadDB {
f: BufReader::new(file),
h: BTreeMap::new(),
buf: vec![0; BUFFER_MAX_SIZE],
h: BTreeMap::default(),
bin: bin_options(),
};
match ret.read::<&str>() {
Ok(DB_SIGNATURE) => Ok(()),
Ok(_) => Err(DBError::Error("database is not a reaction database".into())),
match ret.read::<String>() {
Ok(signature) => {
if DB_SIGNATURE == &signature {
Ok(())
} else {
Err(DBError::Error("database is not a reaction database".into()))
}
}
Err(err) => Err(DBError::Error(format!("reading database signature: {err}"))),
}?;
@ -73,8 +86,8 @@ impl ReadDB {
Ok(Some(ret))
}
fn read<'a, T: Deserialize<'a> + Debug>(&'a mut self) -> Result<T, postcard::Error> {
let (decoded, _) = postcard::from_io::<T, _>((&mut self.f, &mut self.buf))?;
fn read<T: DeserializeOwned + Debug>(&mut self) -> Result<T, bincode::Error> {
let decoded = self.bin.deserialize_from::<_, T>(&mut self.f)?;
debug!("reading this: {:?}", &decoded);
Ok(decoded)
}
@ -84,14 +97,15 @@ impl Iterator for ReadDB {
type Item = Result<LogEntry, DBError>;
fn next(&mut self) -> Option<Self::Item> {
match self.read::<ComputedLogEntry>() {
// FIXME why we got a default item instead of an error or something?
// How do we really know we reached the end?
// For now, checking if time is 0
Ok(ComputedLogEntry { t: 0, .. }) => None,
let res = self.read::<ComputedLogEntry>();
debug!("{res:?}");
match res {
Ok(item) => Some(item.to(&self.h)),
Err(err) => match err {
postcard::Error::DeserializeUnexpectedEnd => None,
Err(err) => match *err {
bincode::ErrorKind::Io(err) => match err.kind() {
io::ErrorKind::UnexpectedEof => None,
_ => Some(Err(err.into())),
},
_ => Some(Err(err.into())),
},
}
@ -101,7 +115,7 @@ impl Iterator for ReadDB {
pub struct WriteDB {
f: BufWriter<File>,
h: WriteHeader,
buf: Vec<u8>,
bin: BinOptions,
}
impl WriteDB {
@ -116,8 +130,8 @@ impl WriteDB {
let mut ret = WriteDB {
f: BufWriter::new(file),
h: BTreeMap::new(),
buf: vec![0; BUFFER_MAX_SIZE],
h: BTreeMap::default(),
bin: bin_options(),
};
if let Err(err) = ret._write(DB_SIGNATURE) {
@ -153,13 +167,9 @@ impl WriteDB {
}
fn _write<T: Serialize + std::fmt::Debug>(&mut self, data: T) -> Result<(), DBError> {
let encoded = postcard::to_slice(&data, &mut self.buf)?;
let encoded = self.bin.serialize(&data)?;
debug!("writing this: {:?}, {:?}", &data, &encoded);
self.f.write_all(encoded)?;
// clear
// for i in 0..self.buf.len() {
// self.buf[i] = 0;
// }
self.f.write_all(&encoded)?;
Ok(())
}

View file

@ -30,7 +30,7 @@ pub enum DBError {
#[error("invalid filter: {0}")]
InvalidFilterError(String),
#[error("decode error: {0}")]
PostcardError(#[from] postcard::Error),
BincodeError(#[from] bincode::Error),
#[error("io error: {0}")]
IOError(#[from] io::Error),
#[error("{0}")]