Improve when the local database integrity check is performed and on what frequency (#1938)

* Improve when the local database integrity check is performed and on what frequency
This commit is contained in:
abraunegg 2022-04-28 11:51:50 +10:00 committed by GitHub
parent 834dc3fbc4
commit a52de36d63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 87 additions and 38 deletions

2
config
View file

@ -27,7 +27,7 @@
# dry_run = "false"
# min_notify_changes = "5"
# monitor_log_frequency = "5"
# monitor_fullscan_frequency = "10"
# monitor_fullscan_frequency = "12"
# sync_root_files = "false"
# classify_as_big_delete = "1000"
# user_agent = ""

View file

@ -24,6 +24,7 @@
+ [skip_file](#skip_file)
+ [skip_dotfiles](#skip_dotfiles)
+ [monitor_interval](#monitor_interval)
+ [monitor_fullscan_frequency](#monitor_fullscan_frequency)
+ [min_notify_changes](#min_notify_changes)
+ [operation_timeout](#operation_timeout)
* [Performing a --resync](#performing-a---resync)
@ -357,7 +358,7 @@ See the [config](https://raw.githubusercontent.com/abraunegg/onedrive/master/con
# dry_run = "false"
# min_notify_changes = "5"
# monitor_log_frequency = "5"
# monitor_fullscan_frequency = "10"
# monitor_fullscan_frequency = "12"
# sync_root_files = "false"
# classify_as_big_delete = "1000"
# user_agent = ""
@ -386,6 +387,8 @@ See the [config](https://raw.githubusercontent.com/abraunegg/onedrive/master/con
The below are 'config' file examples to assist with configuration of the 'config' file:
#### sync_dir
Configure your local sync directory location.
Example:
```text
# When changing a config option below, remove the '#' from the start of the line
@ -427,6 +430,12 @@ sync_file_permissions = "600"
**Important:** Special permission bits (setuid, setgid, sticky bit) are not supported. Valid permission values are from `000` to `777` only.
#### skip_dir
This option is used to 'skip' certain directories and supports pattern matching.
Patterns are case insensitive. `*` and `?` [wildcards characters](https://technet.microsoft.com/en-us/library/bb490639.aspx) are supported. Use `|` to separate multiple patterns.
**Important:** Entries under `skip_dir` are relative to your `sync_dir` path.
Example:
```text
# When changing a config option below, remove the '#' from the start of the line
@ -438,9 +447,6 @@ Example:
skip_dir = "Desktop|Documents/IISExpress|Documents/SQL Server Management Studio|Documents/Visual Studio*|Documents/WindowsPowerShell"
# log_dir = "/var/log/onedrive/"
```
Patterns are case insensitive. `*` and `?` [wildcards characters](https://technet.microsoft.com/en-us/library/bb490639.aspx) are supported. Use `|` to separate multiple patterns.
**Important:** Entries under `skip_dir` are relative to your `sync_dir` path.
**Note:** The `skip_dir` can be specified multiple times, for example:
```text
@ -456,17 +462,8 @@ skip_dir = "SomeDir|OtherDir|ThisDir|ThatDir|/Path/To/A/Directory|/Another/Path/
**Note:** After changing `skip_dir`, you must perform a full re-synchronization by adding `--resync` to your existing command line - for example: `onedrive --synchronize --resync`
#### skip_file
Example:
```text
# When changing a config option below, remove the '#' from the start of the line
# For explanations of all config options below see docs/USAGE.md or the man page.
#
# sync_dir = "~/OneDrive"
skip_file = "~*|Documents/OneNote*|Documents/config.xlaunch|myfile.ext"
# monitor_interval = "300"
# skip_dir = ""
# log_dir = "/var/log/onedrive/"
```
This option is used to 'skip' certain files and supports pattern matching.
Patterns are case insensitive. `*` and `?` [wildcards characters](https://technet.microsoft.com/en-us/library/bb490639.aspx) are supported. Use `|` to separate multiple patterns.
Files can be skipped in the following fashion:
@ -481,6 +478,18 @@ By default, the following files will be skipped:
**Important:** Do not use a skip_file entry of `.*` as this will prevent correct searching of local changes to process.
Example:
```text
# When changing a config option below, remove the '#' from the start of the line
# For explanations of all config options below see docs/USAGE.md or the man page.
#
# sync_dir = "~/OneDrive"
skip_file = "~*|Documents/OneNote*|Documents/config.xlaunch|myfile.ext"
# monitor_interval = "300"
# skip_dir = ""
# log_dir = "/var/log/onedrive/"
```
**Note:** The `skip_file` can be specified multiple times, for example:
```text
skip_file = "~*|.~*|*.tmp|*.swp"
@ -495,6 +504,8 @@ skip_file = "~*|.~*|*.tmp|*.swp|*.blah|never_sync.file"
**Note:** after changing `skip_file`, you must perform a full re-synchronization by adding `--resync` to your existing command line - for example: `onedrive --synchronize --resync`
#### skip_dotfiles
Setting this to `"true"` will skip all .files and .folders while syncing.
Example:
```text
# skip_symlinks = "false"
@ -503,9 +514,12 @@ skip_dotfiles = "true"
# dry_run = "false"
# monitor_interval = "300"
```
Setting this to `"true"` will skip all .files and .folders while syncing.
#### monitor_interval
The monitor interval is defined as the wait time 'between' sync's when running in monitor mode. When this interval expires, the client will check OneDrive for changes online, performing data integrity checks and scanning the local 'sync_dir' for new content.
By default without configuration, 'monitor_interval' is set to 300 seconds. Setting this value to 600 will run the sync process every 10 minutes.
Example:
```text
# skip_dotfiles = "false"
@ -514,27 +528,46 @@ monitor_interval = "600"
# min_notify_changes = "5"
# monitor_log_frequency = "5"
```
The monitor interval is defined as the wait time 'between' sync's when running in monitor mode. By default without configuration, the monitor_interval is set to 300 seconds. Setting this value to 600 will run the sync process every 10 minutes.
#### monitor_fullscan_frequency
This configuration option controls the number of 'monitor_interval' iterations between when a full scan of your data is performed to ensure data integrity and consistency.
By default without configuration, 'monitor_fullscan_frequency' is set to 12. In this default state, this means that a full scan is performed every 'monitor_interval' x 'monitor_fullscan_frequency' = 3600 seconds. This is only applicable when running in --monitor mode.
Setting this value to 24 means that the full scan of OneDrive and checking the integrity of the data stored locally will occur every 2 hours (assuming 'monitor_interval' is set to 300 seconds):
Example:
```text
# min_notify_changes = "5"
# monitor_log_frequency = "5"
monitor_fullscan_frequency = "24"
# sync_root_files = "false"
# classify_as_big_delete = "1000"
```
**Note:** When running in --monitor mode, at application start-up, a full scan will be performed to ensure data integrity. This option has zero effect when running the application in --synchronize mode and a full scan will always be performed.
#### min_notify_changes
This option defines the minimum number of pending incoming changes necessary to trigger a desktop notification. This allows controlling the frequency of notifications.
Example:
```text
# dry_run = "false"
# monitor_interval = "300"
min_notify_changes = "50"
# monitor_log_frequency = "5"
# monitor_fullscan_frequency = "10"
# monitor_fullscan_frequency = "12"
```
This option defines the minimum number of pending incoming changes necessary to trigger a desktop notification. This allows controlling the frequency of notifications.
#### operation_timeout
Operation Timeout is the maximum amount of time (seconds) a file operation is allowed to take. This includes DNS resolution, connecting, data transfer, etc.
Example:
```text
# sync_file_permissions = "600"
# rate_limit = "131072"
operation_timeout = "3600"
```
Operation Timeout is the maximum amount of time (seconds) a file operation is allowed to take. This includes DNS resolution, connecting, data transfer, etc.
### Performing a --resync
If you modify any of the following configuration items, you will be required to perform a `--resync` to ensure your client is syncing your data with the updated configuration:

View file

@ -76,9 +76,10 @@ final class Config
longValues["skip_size"] = 0;
longValues["min_notify_changes"] = 5;
longValues["monitor_log_frequency"] = 5;
// Number of n sync runs before performing a full local scan of sync_dir
// By default 10 which means every ~7.5 minutes a full disk scan of sync_dir will occur
longValues["monitor_fullscan_frequency"] = 10;
// Number of N sync runs before performing a full local scan of sync_dir
// By default 12 which means every ~60 minutes a full disk scan of sync_dir will occur
// 'monitor_interval' * 'monitor_fullscan_frequency' = 3600 = 1 hour
longValues["monitor_fullscan_frequency"] = 12;
// Number of children in a path that is locally removed which will be classified as a 'big data delete'
longValues["classify_as_big_delete"] = 1000;
// Delete source after successful transfer

View file

@ -1257,19 +1257,18 @@ int main(string[] args)
logMonitorCounter = 1;
}
// do we perform a full scan of sync_dir?
// do we perform a full scan of sync_dir and database integrity check?
fullScanCounter += 1;
// fullScanFrequency = 'monitor_fullscan_frequency' from config
if (fullScanCounter > fullScanFrequency){
// loop counter has exceeded
// 'monitor_fullscan_frequency' counter has exceeded
fullScanCounter = 1;
// set fullScanRequired = true due to 'monitor_fullscan_frequency' counter has been exceeded
fullScanRequired = true;
// are we using sync_list?
if (syncListConfigured) {
// set fullScanRequired = true due to sync_list being used
fullScanRequired = true;
// sync list is configured
syncListConfiguredFullScanOverride = true;
} else {
// dont set fullScanRequired to true as this is excessive if sync_list is not being used
fullScanRequired = false;
}
}
@ -1532,11 +1531,27 @@ void performSync(SyncEngine sync, string singleDirectory, bool downloadOnly, boo
} else {
// --monitor in use
// Use individual calls with inotify checks between to avoid a race condition between these 2 functions
// Database scan
sync.scanForDifferencesDatabaseScan(localPath);
// handle any inotify events that occured 'whilst' we were scanning the database
m.update(true);
// Database scan integrity check to compare DB data vs actual content on disk to ensure what we think is local, is local
// and that the data 'hash' as recorded in the DB equals the hash of the actual content
// This process can be extremely expensive time and CPU processing wise
//
// fullScanRequired is set to TRUE when the application starts up, or the config option 'monitor_fullscan_frequency' count is reached
// By default, 'monitor_fullscan_frequency' = 12, and 'monitor_interval' = 300, meaning that by default, a full database consistency check
// is done once an hour.
//
// To change this behaviour adjust 'monitor_interval' and 'monitor_fullscan_frequency' to desired values in the application config file
if (fullScanRequired) {
log.vlog("Performing Database Consistency Integrity Check .. ");
sync.scanForDifferencesDatabaseScan(localPath);
// handle any inotify events that occured 'whilst' we were scanning the database
m.update(true);
} else {
log.vdebug("NOT performing Database Integrity Check .. fullScanRequired = FALSE");
m.update(true);
}
// Filesystem walk to find new files not uploaded
log.vdebug("Searching local filesystem for new data");
sync.scanForDifferencesFilesystemScan(localPath);
// handle any inotify events that occured 'whilst' we were scanning the local filesystem
m.update(true);
@ -1548,12 +1563,12 @@ void performSync(SyncEngine sync, string singleDirectory, bool downloadOnly, boo
// --synchronize & no sync_list : fullScanRequired = false, syncListConfiguredFullScanOverride = false
// --synchronize & sync_list in use : fullScanRequired = false, syncListConfiguredFullScanOverride = true
// --monitor loops around 10 iterations. On the 1st loop, sets fullScanRequired = false, syncListConfiguredFullScanOverride = true if requried
// --monitor loops around 12 iterations. On the 1st loop, sets fullScanRequired = true, syncListConfiguredFullScanOverride = true if requried
// --monitor & no sync_list (loop #1) : fullScanRequired = true, syncListConfiguredFullScanOverride = false
// --monitor & no sync_list (loop #2 - #10) : fullScanRequired = false, syncListConfiguredFullScanOverride = false
// --monitor & no sync_list (loop #2 - #12) : fullScanRequired = false, syncListConfiguredFullScanOverride = false
// --monitor & sync_list in use (loop #1) : fullScanRequired = true, syncListConfiguredFullScanOverride = true
// --monitor & sync_list in use (loop #2 - #10) : fullScanRequired = false, syncListConfiguredFullScanOverride = false
// --monitor & sync_list in use (loop #2 - #12) : fullScanRequired = false, syncListConfiguredFullScanOverride = false
// Do not perform a full walk of the OneDrive objects
if ((!fullScanRequired) && (!syncListConfiguredFullScanOverride)){