Files
sure/lib/tasks/security_backfill.rake
LPW d98711d4ea Rename raw_investments_payload to raw_holdings_payload for Plaid accounts (#760)
* refactor: rename `raw_investments_payload` to `raw_holdings_payload`

- Update references and models to use consistent naming.
- Adjust migrations, tests, and encryption setup accordingly.

* fix: improve safety when accessing raw_holdings_payload keys

- Use `dig` with safe navigation to prevent potential nil errors.
- Add support for decryption from the old column name `raw_investments_payload`.
- Adjust related methods and calculations for consistency.

---------

Co-authored-by: luckyPipewrench <luckypipewrench@proton.me>
2026-01-24 11:16:26 +01:00

180 lines
6.6 KiB
Ruby

# frozen_string_literal: true
namespace :security do
desc "Backfill encryption for sensitive fields (idempotent). Args: batch_size, dry_run"
task :backfill_encryption, [ :batch_size, :dry_run ] => :environment do |_, args|
raw_batch = args[:batch_size].presence || ENV["BATCH_SIZE"].presence || "100"
raw_dry = args[:dry_run].presence || ENV["DRY_RUN"].presence
batch_size = raw_batch.to_i
batch_size = 100 if batch_size <= 0
dry_run = case raw_dry.to_s.strip.downcase
when "0", "false", "no", "n" then false
when "1", "true", "yes", "y" then true
else
true # Default to dry run for safety
end
# Check encryption configuration (use User model which includes Encryptable)
unless User.encryption_ready?
puts({
ok: false,
error: "encryption_not_configured",
message: "ActiveRecord encryption is not configured. Set credentials or environment variables."
}.to_json)
exit 1
end
results = {}
puts "Starting security backfill (dry_run: #{dry_run}, batch_size: #{batch_size})..."
# User fields (MFA + PII)
# Note: otp_backup_codes excluded - it's a PostgreSQL array column incompatible with AR encryption
results[:users] = backfill_model(User, %i[otp_secret email unconfirmed_email first_name last_name], batch_size, dry_run)
# Invitation tokens and email
results[:invitations] = backfill_model(Invitation, %i[token email], batch_size, dry_run)
# InviteCode tokens
results[:invite_codes] = backfill_model(InviteCode, %i[token], batch_size, dry_run)
# Session user_agent (encryption) and ip_address_digest (hashing)
results[:sessions] = backfill_sessions(batch_size, dry_run)
# MobileDevice device_id
results[:mobile_devices] = backfill_model(MobileDevice, %i[device_id], batch_size, dry_run)
# Provider items
results[:plaid_items] = backfill_model(PlaidItem, %i[access_token raw_payload raw_institution_payload], batch_size, dry_run)
results[:simplefin_items] = backfill_model(SimplefinItem, %i[access_url raw_payload raw_institution_payload], batch_size, dry_run)
results[:lunchflow_items] = backfill_model(LunchflowItem, %i[api_key raw_payload raw_institution_payload], batch_size, dry_run)
results[:enable_banking_items] = backfill_model(EnableBankingItem, %i[client_certificate session_id raw_payload raw_institution_payload], batch_size, dry_run)
# Provider accounts
results[:plaid_accounts] = backfill_model(PlaidAccount, %i[raw_payload raw_transactions_payload raw_holdings_payload raw_liabilities_payload], batch_size, dry_run)
results[:simplefin_accounts] = backfill_model(SimplefinAccount, %i[raw_payload raw_transactions_payload raw_holdings_payload], batch_size, dry_run)
results[:lunchflow_accounts] = backfill_model(LunchflowAccount, %i[raw_payload raw_transactions_payload], batch_size, dry_run)
results[:enable_banking_accounts] = backfill_model(EnableBankingAccount, %i[raw_payload raw_transactions_payload], batch_size, dry_run)
puts({
ok: true,
dry_run: dry_run,
batch_size: batch_size,
results: results
}.to_json)
end
def backfill_model(model_class, fields, batch_size, dry_run, &filter_block)
processed = 0
updated = 0
failed = []
model_class.order(:id).in_batches(of: batch_size) do |batch|
batch.each do |record|
processed += 1
# Skip if filter block returns false
next if block_given? && !filter_block.call(record)
# Check if any field has data (use safe read to handle plaintext)
next unless fields.any? { |f| safe_read_field(record, f).present? }
next if dry_run
begin
# Read plaintext values safely
plaintext_values = {}
fields.each do |field|
value = safe_read_field(record, field)
plaintext_values[field] = value if value.present?
end
next if plaintext_values.empty?
# Use a temporary instance to encrypt values (avoids triggering
# validations/callbacks that might read other encrypted fields)
encryptor = model_class.new
plaintext_values.each do |field, value|
encryptor.send("#{field}=", value)
end
# Extract the encrypted values from the temporary instance
encrypted_attrs = {}
plaintext_values.keys.each do |field|
encrypted_attrs[field] = encryptor.read_attribute_before_type_cast(field)
end
# Write directly to database, bypassing callbacks/validations
record.update_columns(encrypted_attrs)
updated += 1
rescue => e
failed << { id: record.id, error: e.class.name, message: e.message }
end
end
end
{
processed: processed,
updated: updated,
failed_count: failed.size,
failed_samples: failed.take(3)
}
end
# Safely read a field value, handling both encrypted and plaintext data.
# When encryption is configured but the value is plaintext, the getter
# raises ActiveRecord::Encryption::Errors::Decryption. In this case,
# we fall back to reading the raw database value.
def safe_read_field(record, field)
record.send(field)
rescue ActiveRecord::Encryption::Errors::Decryption
record.read_attribute_before_type_cast(field)
end
def backfill_sessions(batch_size, dry_run)
processed = 0
updated = 0
failed = []
Session.order(:id).in_batches(of: batch_size) do |batch|
batch.each do |session|
processed += 1
next if dry_run
begin
changes = {}
# Re-save user_agent to trigger encryption (use safe read for plaintext)
user_agent_value = safe_read_field(session, :user_agent)
if user_agent_value.present?
# Use temporary instance to encrypt
encryptor = Session.new
encryptor.user_agent = user_agent_value
changes[:user_agent] = encryptor.read_attribute_before_type_cast(:user_agent)
end
# Hash IP address into ip_address_digest if not already done
if session.ip_address.present? && session.ip_address_digest.blank?
changes[:ip_address_digest] = Digest::SHA256.hexdigest(session.ip_address.to_s)
end
if changes.present?
session.update_columns(changes)
updated += 1
end
rescue => e
failed << { id: session.id, error: e.class.name, message: e.message }
end
end
end
{
processed: processed,
updated: updated,
failed_count: failed.size,
failed_samples: failed.take(3)
}
end
end