// Copyright (c) 2024 Tulir Asokan // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. //go:build cgo && !nocrypto package matrix import ( "context" "errors" "fmt" "os" "runtime/debug" "strings" "sync" "time" "github.com/lib/pq" "github.com/rs/zerolog" "go.mau.fi/util/dbutil" "maunium.net/go/mautrix" "maunium.net/go/mautrix/bridgev2" "maunium.net/go/mautrix/bridgev2/database" "maunium.net/go/mautrix/crypto" "maunium.net/go/mautrix/crypto/olm" "maunium.net/go/mautrix/event" "maunium.net/go/mautrix/id" "maunium.net/go/mautrix/sqlstatestore" ) func init() { crypto.PostgresArrayWrapper = pq.Array } var _ crypto.StateStore = (*sqlstatestore.SQLStateStore)(nil) var NoSessionFound = crypto.ErrNoSessionFound var DuplicateMessageIndex = crypto.ErrDuplicateMessageIndex var UnknownMessageIndex = olm.ErrUnknownMessageIndex type CryptoHelper struct { bridge *Connector client *mautrix.Client mach *crypto.OlmMachine store *SQLCryptoStore log *zerolog.Logger lock sync.RWMutex syncDone sync.WaitGroup cancelSync func() cancelPeriodicDeleteLoop func() } func NewCryptoHelper(c *Connector) Crypto { if !c.Config.Encryption.Allow { c.Log.Debug().Msg("Bridge built with end-to-bridge encryption, but disabled in config") return nil } log := c.Log.With().Str("component", "crypto").Logger() return &CryptoHelper{ bridge: c, log: &log, } } func (helper *CryptoHelper) Init(ctx context.Context) error { if len(helper.bridge.Config.Encryption.PickleKey) == 0 { panic("CryptoPickleKey not set") } helper.log.Debug().Msg("Initializing end-to-bridge encryption...") helper.store = NewSQLCryptoStore( helper.bridge.Bridge.DB.Database, dbutil.ZeroLogger(helper.bridge.Log.With().Str("db_section", "crypto").Logger()), string(helper.bridge.Bridge.ID), helper.bridge.AS.BotMXID(), fmt.Sprintf("@%s:%s", strings.ReplaceAll(helper.bridge.Config.AppService.FormatUsername("%"), "_", `\_`), helper.bridge.AS.HomeserverDomain), helper.bridge.Config.Encryption.PickleKey, ) err := helper.store.DB.Upgrade(ctx) if err != nil { return bridgev2.DBUpgradeError{Section: "crypto", Err: err} } var isExistingDevice bool helper.client, isExistingDevice, err = helper.loginBot(ctx) if err != nil { return err } helper.log.Debug(). Str("device_id", helper.client.DeviceID.String()). Msg("Logged in as bridge bot") helper.mach = crypto.NewOlmMachine(helper.client, helper.log, helper.store, helper.bridge.StateStore) helper.mach.DisableSharedGroupSessionTracking = true helper.mach.AllowKeyShare = helper.allowKeyShare encryptionConfig := helper.bridge.Config.Encryption helper.mach.SendKeysMinTrust = encryptionConfig.VerificationLevels.Receive helper.mach.PlaintextMentions = encryptionConfig.PlaintextMentions helper.mach.DeleteOutboundKeysOnAck = encryptionConfig.DeleteKeys.DeleteOutboundOnAck helper.mach.DontStoreOutboundKeys = encryptionConfig.DeleteKeys.DontStoreOutbound helper.mach.RatchetKeysOnDecrypt = encryptionConfig.DeleteKeys.RatchetOnDecrypt helper.mach.DeleteFullyUsedKeysOnDecrypt = encryptionConfig.DeleteKeys.DeleteFullyUsedOnDecrypt helper.mach.DeletePreviousKeysOnReceive = encryptionConfig.DeleteKeys.DeletePrevOnNewSession helper.mach.DeleteKeysOnDeviceDelete = encryptionConfig.DeleteKeys.DeleteOnDeviceDelete helper.mach.DisableDeviceChangeKeyRotation = encryptionConfig.Rotation.DisableDeviceChangeKeyRotation if encryptionConfig.DeleteKeys.PeriodicallyDeleteExpired { ctx, cancel := context.WithCancel(context.Background()) helper.cancelPeriodicDeleteLoop = cancel go helper.mach.ExpiredKeyDeleteLoop(ctx) } if encryptionConfig.DeleteKeys.DeleteOutdatedInbound { deleted, err := helper.store.RedactOutdatedGroupSessions(ctx) if err != nil { return err } if len(deleted) > 0 { helper.log.Debug().Int("deleted", len(deleted)).Msg("Deleted inbound keys which lacked expiration metadata") } } helper.client.Syncer = &cryptoSyncer{helper.mach} helper.client.Store = helper.store err = helper.mach.Load(ctx) if err != nil { return err } if isExistingDevice { if !helper.verifyKeysAreOnServer(ctx) { return nil } } else { err = helper.ShareKeys(ctx) if err != nil { return fmt.Errorf("failed to share device keys: %w", err) } } if helper.bridge.Config.Encryption.SelfSign { if !helper.doSelfSign(ctx) { os.Exit(34) } } go helper.resyncEncryptionInfo(context.TODO()) return nil } func (helper *CryptoHelper) doSelfSign(ctx context.Context) bool { log := zerolog.Ctx(ctx) hasKeys, isVerified, err := helper.mach.GetOwnVerificationStatus(ctx) if err != nil { log.WithLevel(zerolog.FatalLevel).Err(err).Msg("Failed to check verification status") return false } log.Debug().Bool("has_keys", hasKeys).Bool("is_verified", isVerified).Msg("Checked verification status") keyInDB := helper.bridge.Bridge.DB.KV.Get(ctx, database.KeyRecoveryKey) if !hasKeys || keyInDB == "overwrite" { if keyInDB != "" && keyInDB != "overwrite" { log.WithLevel(zerolog.FatalLevel). Msg("No keys on server, but database already has recovery key. Delete `recovery_key` from `kv_store` manually to continue.") return false } recoveryKey, err := helper.mach.GenerateAndVerifyWithRecoveryKey(ctx) if recoveryKey != "" { helper.bridge.Bridge.DB.KV.Set(ctx, database.KeyRecoveryKey, recoveryKey) } if err != nil { log.WithLevel(zerolog.FatalLevel).Err(err).Msg("Failed to generate recovery key and self-sign") return false } log.Info().Msg("Generated new recovery key and self-signed bot device") } else if !isVerified { if keyInDB == "" { log.WithLevel(zerolog.FatalLevel). Msg("Server already has cross-signing keys, but no key in database. Add `recovery_key` to `kv_store`, or set it to `overwrite` to generate new keys.") return false } err = helper.mach.VerifyWithRecoveryKey(ctx, keyInDB) if err != nil { log.WithLevel(zerolog.FatalLevel).Err(err).Msg("Failed to verify with recovery key") return false } log.Info().Msg("Verified bot device with existing recovery key") } return true } func (helper *CryptoHelper) resyncEncryptionInfo(ctx context.Context) { log := helper.log.With().Str("action", "resync encryption event").Logger() rows, err := helper.store.DB.Query(ctx, `SELECT room_id FROM mx_room_state WHERE encryption='{"resync":true}'`) roomIDs, err := dbutil.NewRowIterWithError(rows, dbutil.ScanSingleColumn[id.RoomID], err).AsList() if err != nil { log.Err(err).Msg("Failed to query rooms for resync") return } if len(roomIDs) > 0 { log.Debug().Interface("room_ids", roomIDs).Msg("Resyncing rooms") for _, roomID := range roomIDs { var evt event.EncryptionEventContent err = helper.client.StateEvent(ctx, roomID, event.StateEncryption, "", &evt) if err != nil { log.Err(err).Stringer("room_id", roomID).Msg("Failed to get encryption event") _, err = helper.store.DB.Exec(ctx, ` UPDATE mx_room_state SET encryption=NULL WHERE room_id=$1 AND encryption='{"resync":true}' `, roomID) if err != nil { log.Err(err).Stringer("room_id", roomID).Msg("Failed to unmark room for resync after failed sync") } } else { maxAge := evt.RotationPeriodMillis if maxAge <= 0 { maxAge = (7 * 24 * time.Hour).Milliseconds() } maxMessages := evt.RotationPeriodMessages if maxMessages <= 0 { maxMessages = 100 } log.Debug(). Str("room_id", roomID.String()). Int64("max_age_ms", maxAge). Int("max_messages", maxMessages). Interface("content", &evt). Msg("Resynced encryption event") _, err = helper.store.DB.Exec(ctx, ` UPDATE crypto_megolm_inbound_session SET max_age=$1, max_messages=$2 WHERE room_id=$3 AND max_age IS NULL AND max_messages IS NULL `, maxAge, maxMessages, roomID) if err != nil { log.Err(err).Stringer("room_id", roomID).Msg("Failed to update megolm session table") } else { log.Debug().Stringer("room_id", roomID).Msg("Updated megolm session table") } } } } } func (helper *CryptoHelper) allowKeyShare(ctx context.Context, device *id.Device, info event.RequestedKeyInfo) *crypto.KeyShareRejection { cfg := helper.bridge.Config.Encryption if !cfg.AllowKeySharing { return &crypto.KeyShareRejectNoResponse } else if device.Trust == id.TrustStateBlacklisted { return &crypto.KeyShareRejectBlacklisted } else if trustState, _ := helper.mach.ResolveTrustContext(ctx, device); trustState >= cfg.VerificationLevels.Share { portal, err := helper.bridge.Bridge.GetPortalByMXID(ctx, info.RoomID) if err != nil { zerolog.Ctx(ctx).Err(err).Msg("Failed to get portal to handle key request") return &crypto.KeyShareRejectNoResponse } else if portal == nil { zerolog.Ctx(ctx).Debug().Msg("Rejecting key request: room is not a portal") return &crypto.KeyShareRejection{Code: event.RoomKeyWithheldUnavailable, Reason: "Requested room is not a portal room"} } user, err := helper.bridge.Bridge.GetExistingUserByMXID(ctx, device.UserID) if err != nil { zerolog.Ctx(ctx).Err(err).Msg("Failed to get user to handle key request") return &crypto.KeyShareRejectNoResponse } else if user == nil { zerolog.Ctx(ctx).Debug().Msg("Couldn't find user to handle key request") return &crypto.KeyShareRejectNoResponse } else if !user.Permissions.Admin { zerolog.Ctx(ctx).Debug().Msg("Rejecting key request: user is not admin") // TODO is in room check? return &crypto.KeyShareRejection{Code: event.RoomKeyWithheldUnauthorized, Reason: "Key sharing for non-admins is not yet implemented"} } zerolog.Ctx(ctx).Debug().Msg("Accepting key request") return nil } else { return &crypto.KeyShareRejectUnverified } } func (helper *CryptoHelper) loginBot(ctx context.Context) (*mautrix.Client, bool, error) { deviceID, err := helper.store.FindDeviceID(ctx) if err != nil { return nil, false, fmt.Errorf("failed to find existing device ID: %w", err) } else if len(deviceID) > 0 { helper.log.Debug().Stringer("device_id", deviceID).Msg("Found existing device ID for bot in database") } // Create a new client instance with the default AS settings (including as_token), // the Login call will then override the access token in the client. client := helper.bridge.AS.NewMautrixClient(helper.bridge.AS.BotMXID()) initialDeviceDisplayName := fmt.Sprintf("%s bridge", helper.bridge.Bridge.Network.GetName().DisplayName) if helper.bridge.Config.Encryption.MSC4190 { helper.log.Debug().Msg("Creating bot device with MSC4190") err = client.CreateDeviceMSC4190(ctx, deviceID, initialDeviceDisplayName) if err != nil { return nil, deviceID != "", fmt.Errorf("failed to create device for bridge bot: %w", err) } helper.store.DeviceID = client.DeviceID return client, deviceID != "", nil } flows, err := client.GetLoginFlows(ctx) if err != nil { return nil, deviceID != "", fmt.Errorf("failed to get supported login flows: %w", err) } else if !flows.HasFlow(mautrix.AuthTypeAppservice) { return nil, deviceID != "", fmt.Errorf("homeserver does not support appservice login") } resp, err := client.Login(ctx, &mautrix.ReqLogin{ Type: mautrix.AuthTypeAppservice, Identifier: mautrix.UserIdentifier{ Type: mautrix.IdentifierTypeUser, User: string(helper.bridge.AS.BotMXID()), }, DeviceID: deviceID, StoreCredentials: true, InitialDeviceDisplayName: initialDeviceDisplayName, }) if err != nil { return nil, deviceID != "", fmt.Errorf("failed to log in as bridge bot: %w", err) } helper.store.DeviceID = resp.DeviceID return client, deviceID != "", nil } func (helper *CryptoHelper) verifyKeysAreOnServer(ctx context.Context) bool { helper.log.Debug().Msg("Making sure keys are still on server") resp, err := helper.client.QueryKeys(ctx, &mautrix.ReqQueryKeys{ DeviceKeys: map[id.UserID]mautrix.DeviceIDList{ helper.client.UserID: {helper.client.DeviceID}, }, }) if err != nil { helper.log.WithLevel(zerolog.FatalLevel).Err(err).Msg("Failed to query own keys to make sure device still exists") os.Exit(33) } device, ok := resp.DeviceKeys[helper.client.UserID][helper.client.DeviceID] if ok && len(device.Keys) > 0 { return true } helper.log.Warn().Msg("Existing device doesn't have keys on server, resetting crypto") helper.Reset(ctx, false) return false } func (helper *CryptoHelper) Start() { if helper.bridge.Config.Encryption.Appservice { helper.log.Debug().Msg("End-to-bridge encryption is in appservice mode, registering event listeners and not starting syncer") helper.bridge.AS.Registration.EphemeralEvents = true helper.mach.AddAppserviceListener(helper.bridge.EventProcessor) return } helper.syncDone.Add(1) defer helper.syncDone.Done() helper.log.Debug().Msg("Starting syncer for receiving to-device messages") var ctx context.Context ctx, helper.cancelSync = context.WithCancel(context.Background()) err := helper.client.SyncWithContext(ctx) if err != nil && !errors.Is(err, context.Canceled) { helper.log.WithLevel(zerolog.FatalLevel).Err(err).Msg("Fatal error syncing") os.Exit(51) } else { helper.log.Info().Msg("Bridge bot to-device syncer stopped without error") } } func (helper *CryptoHelper) Stop() { helper.log.Debug().Msg("CryptoHelper.Stop() called, stopping bridge bot sync") helper.client.StopSync() if helper.cancelSync != nil { helper.cancelSync() } if helper.cancelPeriodicDeleteLoop != nil { helper.cancelPeriodicDeleteLoop() } helper.syncDone.Wait() } func (helper *CryptoHelper) clearDatabase(ctx context.Context) { _, err := helper.store.DB.Exec(ctx, "DELETE FROM crypto_account") if err != nil { helper.log.Warn().Err(err).Msg("Failed to clear crypto_account table") } _, err = helper.store.DB.Exec(ctx, "DELETE FROM crypto_olm_session") if err != nil { helper.log.Warn().Err(err).Msg("Failed to clear crypto_olm_session table") } _, err = helper.store.DB.Exec(ctx, "DELETE FROM crypto_megolm_outbound_session") if err != nil { helper.log.Warn().Err(err).Msg("Failed to clear crypto_megolm_outbound_session table") } //_, _ = helper.store.DB.Exec("DELETE FROM crypto_device") //_, _ = helper.store.DB.Exec("DELETE FROM crypto_tracked_user") //_, _ = helper.store.DB.Exec("DELETE FROM crypto_cross_signing_keys") //_, _ = helper.store.DB.Exec("DELETE FROM crypto_cross_signing_signatures") } func (helper *CryptoHelper) Reset(ctx context.Context, startAfterReset bool) { helper.lock.Lock() defer helper.lock.Unlock() helper.log.Info().Msg("Resetting end-to-bridge encryption device") helper.Stop() helper.log.Debug().Msg("Crypto syncer stopped, clearing database") helper.clearDatabase(ctx) helper.log.Debug().Msg("Crypto database cleared, logging out of all sessions") _, err := helper.client.LogoutAll(ctx) if err != nil { helper.log.Warn().Err(err).Msg("Failed to log out all devices") } helper.client = nil helper.store = nil helper.mach = nil err = helper.Init(ctx) if err != nil { helper.log.WithLevel(zerolog.FatalLevel).Err(err).Msg("Error reinitializing end-to-bridge encryption") os.Exit(50) } helper.log.Info().Msg("End-to-bridge encryption successfully reset") if startAfterReset { go helper.Start() } } func (helper *CryptoHelper) Client() *mautrix.Client { return helper.client } func (helper *CryptoHelper) Decrypt(ctx context.Context, evt *event.Event) (*event.Event, error) { return helper.mach.DecryptMegolmEvent(ctx, evt) } func (helper *CryptoHelper) Encrypt(ctx context.Context, roomID id.RoomID, evtType event.Type, content *event.Content) (err error) { helper.lock.RLock() defer helper.lock.RUnlock() var encrypted *event.EncryptedEventContent encrypted, err = helper.mach.EncryptMegolmEvent(ctx, roomID, evtType, content) if err != nil { if !errors.Is(err, crypto.ErrSessionExpired) && !errors.Is(err, crypto.ErrSessionNotShared) && !errors.Is(err, crypto.ErrNoGroupSession) { return } helper.log.Debug().Err(err). Str("room_id", roomID.String()). Msg("Got error while encrypting event for room, sharing group session and trying again...") var users []id.UserID users, err = helper.store.GetRoomJoinedOrInvitedMembers(ctx, roomID) if err != nil { err = fmt.Errorf("failed to get room member list: %w", err) } else if err = helper.mach.ShareGroupSession(ctx, roomID, users); err != nil { err = fmt.Errorf("failed to share group session: %w", err) } else if encrypted, err = helper.mach.EncryptMegolmEvent(ctx, roomID, evtType, content); err != nil { err = fmt.Errorf("failed to encrypt event after re-sharing group session: %w", err) } } if encrypted != nil { content.Parsed = encrypted content.Raw = nil } return } func (helper *CryptoHelper) WaitForSession(ctx context.Context, roomID id.RoomID, senderKey id.SenderKey, sessionID id.SessionID, timeout time.Duration) bool { helper.lock.RLock() defer helper.lock.RUnlock() return helper.mach.WaitForSession(ctx, roomID, senderKey, sessionID, timeout) } func (helper *CryptoHelper) RequestSession(ctx context.Context, roomID id.RoomID, senderKey id.SenderKey, sessionID id.SessionID, userID id.UserID, deviceID id.DeviceID) { helper.lock.RLock() defer helper.lock.RUnlock() if deviceID == "" { deviceID = "*" } err := helper.mach.SendRoomKeyRequest(ctx, roomID, senderKey, sessionID, "", map[id.UserID][]id.DeviceID{userID: {deviceID}}) if err != nil { helper.log.Warn().Err(err). Str("user_id", userID.String()). Str("device_id", deviceID.String()). Str("session_id", sessionID.String()). Str("room_id", roomID.String()). Msg("Failed to send key request") } else { helper.log.Debug(). Str("user_id", userID.String()). Str("device_id", deviceID.String()). Str("session_id", sessionID.String()). Str("room_id", roomID.String()). Msg("Sent key request") } } func (helper *CryptoHelper) ResetSession(ctx context.Context, roomID id.RoomID) { helper.lock.RLock() defer helper.lock.RUnlock() err := helper.mach.CryptoStore.RemoveOutboundGroupSession(ctx, roomID) if err != nil { helper.log.Debug().Err(err). Str("room_id", roomID.String()). Msg("Error manually removing outbound group session in room") } } func (helper *CryptoHelper) HandleMemberEvent(ctx context.Context, evt *event.Event) { helper.lock.RLock() defer helper.lock.RUnlock() helper.mach.HandleMemberEvent(ctx, evt) } // ShareKeys uploads the given number of one-time-keys to the server. func (helper *CryptoHelper) ShareKeys(ctx context.Context) error { return helper.mach.ShareKeys(ctx, -1) } type cryptoSyncer struct { *crypto.OlmMachine } func (syncer *cryptoSyncer) ProcessResponse(ctx context.Context, resp *mautrix.RespSync, since string) error { done := make(chan struct{}) go func() { defer func() { if err := recover(); err != nil { syncer.Log.Error(). Str("since", since). Interface("error", err). Str("stack", string(debug.Stack())). Msg("Processing sync response panicked") } done <- struct{}{} }() syncer.Log.Trace().Str("since", since).Msg("Starting sync response handling") syncer.ProcessSyncResponse(ctx, resp, since) syncer.Log.Trace().Str("since", since).Msg("Successfully handled sync response") }() select { case <-done: case <-time.After(30 * time.Second): syncer.Log.Warn().Str("since", since).Msg("Handling sync response is taking unusually long") } return nil } func (syncer *cryptoSyncer) OnFailedSync(_ *mautrix.RespSync, err error) (time.Duration, error) { if errors.Is(err, mautrix.MUnknownToken) { return 0, err } syncer.Log.Error().Err(err).Msg("Error /syncing, waiting 10 seconds") return 10 * time.Second, nil } func (syncer *cryptoSyncer) GetFilterJSON(_ id.UserID) *mautrix.Filter { everything := []event.Type{{Type: "*"}} return &mautrix.Filter{ Presence: &mautrix.FilterPart{NotTypes: everything}, AccountData: &mautrix.FilterPart{NotTypes: everything}, Room: &mautrix.RoomFilter{ IncludeLeave: false, Ephemeral: &mautrix.FilterPart{NotTypes: everything}, AccountData: &mautrix.FilterPart{NotTypes: everything}, State: &mautrix.FilterPart{NotTypes: everything}, Timeline: &mautrix.FilterPart{NotTypes: everything}, }, } }