diff --git a/app/models/assistant/configurable.rb b/app/models/assistant/configurable.rb index e0122e38e..94d8755f1 100644 --- a/app/models/assistant/configurable.rb +++ b/app/models/assistant/configurable.rb @@ -58,7 +58,8 @@ module Assistant::Configurable Assistant::Function::GetHoldings, Assistant::Function::GetBalanceSheet, Assistant::Function::GetIncomeStatement, - Assistant::Function::ImportBankStatement + Assistant::Function::ImportBankStatement, + Assistant::Function::SearchFamilyFiles ] end diff --git a/app/models/assistant/function/search_family_files.rb b/app/models/assistant/function/search_family_files.rb new file mode 100644 index 000000000..2c0e5bf37 --- /dev/null +++ b/app/models/assistant/function/search_family_files.rb @@ -0,0 +1,118 @@ +class Assistant::Function::SearchFamilyFiles < Assistant::Function + class << self + def name + "search_family_files" + end + + def description + <<~DESC + Search through documents that the family has uploaded to their financial document store. + + Use this when the user asks questions about their uploaded financial documents such as + tax returns, bank statements, contracts, insurance policies, investment reports, or any + other files they've imported. + + Returns relevant excerpts from matching documents along with the source filename and + a relevance score. + + Supported file types include: PDF, DOCX, XLSX, PPTX, TXT, CSV, JSON, XML, HTML, MD, + and common source code formats. + + Example: + + ``` + search_family_files({ + query: "What was the total income on my 2024 tax return?" + }) + ``` + DESC + end + end + + def strict_mode? + false + end + + def params_schema + build_schema( + required: [ "query" ], + properties: { + query: { + type: "string", + description: "The search query to find relevant information in the family's uploaded documents" + }, + max_results: { + type: "integer", + description: "Maximum number of results to return (default: 10, max: 20)" + } + } + ) + end + + def call(params = {}) + query = params["query"] + max_results = (params["max_results"] || 10).to_i.clamp(1, 20) + + unless family.vector_store_id.present? + return { + success: false, + error: "no_documents", + message: "No documents have been uploaded to the family document store yet." + } + end + + adapter = VectorStore.adapter + + unless adapter + return { + success: false, + error: "provider_not_configured", + message: "No vector store is configured. Set VECTOR_STORE_PROVIDER or configure OpenAI." + } + end + + response = adapter.search( + store_id: family.vector_store_id, + query: query, + max_results: max_results + ) + + unless response.success? + return { + success: false, + error: "search_failed", + message: "Failed to search documents: #{response.error&.message}" + } + end + + results = response.data + + if results.empty? + return { + success: true, + results: [], + message: "No matching documents found for the query." + } + end + + { + success: true, + query: query, + result_count: results.size, + results: results.map do |result| + { + content: result[:content], + filename: result[:filename], + score: result[:score] + } + end + } + rescue => e + Rails.logger.error("SearchFamilyFiles error: #{e.class.name} - #{e.message}") + { + success: false, + error: "search_failed", + message: "An error occurred while searching documents: #{e.message.truncate(200)}" + } + end +end diff --git a/app/models/family.rb b/app/models/family.rb index 691d1886d..6bc4ad471 100644 --- a/app/models/family.rb +++ b/app/models/family.rb @@ -1,7 +1,8 @@ class Family < ApplicationRecord - include IndexaCapitalConnectable + include Syncable, AutoTransferMatchable, Subscribeable, VectorSearchable + include PlaidConnectable, SimplefinConnectable, LunchflowConnectable, EnableBankingConnectable include CoinbaseConnectable, CoinstatsConnectable, SnaptradeConnectable, MercuryConnectable - include PlaidConnectable, SimplefinConnectable, LunchflowConnectable, EnableBankingConnectable, Syncable, AutoTransferMatchable, Subscribeable + include IndexaCapitalConnectable DATE_FORMATS = [ [ "MM-DD-YYYY", "%m-%d-%Y" ], diff --git a/app/models/family/vector_searchable.rb b/app/models/family/vector_searchable.rb new file mode 100644 index 000000000..4f22d47fe --- /dev/null +++ b/app/models/family/vector_searchable.rb @@ -0,0 +1,84 @@ +module Family::VectorSearchable + extend ActiveSupport::Concern + + included do + has_many :family_documents, dependent: :destroy + end + + def ensure_vector_store! + return vector_store_id if vector_store_id.present? + + adapter = vector_store_adapter + return nil unless adapter + + response = adapter.create_store(name: "Family #{id} Documents") + return nil unless response.success? + + if update(vector_store_id: response.data[:id]) + vector_store_id + else + adapter.delete_store(store_id: response.data[:id]) rescue nil + nil + end + end + + def search_documents(query, max_results: 10) + return [] unless vector_store_id.present? + + adapter = vector_store_adapter + return [] unless adapter + + response = adapter.search( + store_id: vector_store_id, + query: query, + max_results: max_results + ) + + response.success? ? response.data : [] + end + + def upload_document(file_content:, filename:) + adapter = vector_store_adapter + return nil unless adapter + + store_id = ensure_vector_store! + return nil unless store_id + + response = adapter.upload_file( + store_id: store_id, + file_content: file_content, + filename: filename + ) + + return nil unless response.success? + + family_documents.create!( + filename: filename, + content_type: Marcel::MimeType.for(name: filename), + file_size: file_content.bytesize, + provider_file_id: response.data[:file_id], + status: "ready" + ) + end + + def remove_document(family_document) + adapter = vector_store_adapter + return false unless adapter && vector_store_id.present? && family_document.provider_file_id.present? + + response = adapter.remove_file( + store_id: vector_store_id, + file_id: family_document.provider_file_id + ) + + return false unless response.success? + + family_document.destroy + true + end + + private + + def vector_store_adapter + VectorStore.adapter + end +end diff --git a/app/models/family_document.rb b/app/models/family_document.rb new file mode 100644 index 000000000..8320a2a39 --- /dev/null +++ b/app/models/family_document.rb @@ -0,0 +1,25 @@ +class FamilyDocument < ApplicationRecord + belongs_to :family + + has_one_attached :file + + SUPPORTED_EXTENSIONS = VectorStore::Base::SUPPORTED_EXTENSIONS + + validates :filename, presence: true + validates :status, inclusion: { in: %w[pending processing ready error] } + + scope :ready, -> { where(status: "ready") } + + def mark_ready! + update!(status: "ready") + end + + def mark_error!(error_message = nil) + update!(status: "error", metadata: (metadata || {}).merge("error" => error_message)) + end + + def supported_extension? + ext = File.extname(filename).downcase + SUPPORTED_EXTENSIONS.include?(ext) + end +end diff --git a/app/models/vector_store.rb b/app/models/vector_store.rb new file mode 100644 index 000000000..78d09ab7e --- /dev/null +++ b/app/models/vector_store.rb @@ -0,0 +1,14 @@ +module VectorStore + Error = Class.new(StandardError) + ConfigurationError = Class.new(Error) + + Response = Data.define(:success?, :data, :error) + + def self.adapter + Registry.adapter + end + + def self.configured? + Registry.configured? + end +end diff --git a/app/models/vector_store/base.rb b/app/models/vector_store/base.rb new file mode 100644 index 000000000..bd44d289a --- /dev/null +++ b/app/models/vector_store/base.rb @@ -0,0 +1,68 @@ +class VectorStore::Base + SUPPORTED_EXTENSIONS = %w[ + .c .cpp .css .csv .docx .gif .go .html .java .jpeg .jpg .js .json + .md .pdf .php .png .pptx .py .rb .sh .tar .tex .ts .txt .xlsx .xml .zip + ].freeze + + # Create a new vector store / collection / namespace + # @param name [String] human-readable name + # @return [Hash] { id: "store-identifier" } + def create_store(name:) + raise NotImplementedError + end + + # Delete a vector store and all its files + # @param store_id [String] + def delete_store(store_id:) + raise NotImplementedError + end + + # Upload and index a file + # @param store_id [String] + # @param file_content [String] raw file bytes + # @param filename [String] original filename with extension + # @return [Hash] { file_id: "file-identifier" } + def upload_file(store_id:, file_content:, filename:) + raise NotImplementedError + end + + # Remove a previously uploaded file + # @param store_id [String] + # @param file_id [String] + def remove_file(store_id:, file_id:) + raise NotImplementedError + end + + # Semantic search across indexed files + # @param store_id [String] + # @param query [String] natural-language search query + # @param max_results [Integer] + # @return [Array] each { content:, filename:, score:, file_id: } + def search(store_id:, query:, max_results: 10) + raise NotImplementedError + end + + # Which file extensions this adapter can ingest + def supported_extensions + SUPPORTED_EXTENSIONS + end + + private + + def success(data) + VectorStore::Response.new(success?: true, data: data, error: nil) + end + + def failure(error) + wrapped = error.is_a?(VectorStore::Error) ? error : VectorStore::Error.new(error.message) + VectorStore::Response.new(success?: false, data: nil, error: wrapped) + end + + def with_response(&block) + data = yield + success(data) + rescue => e + Rails.logger.error("#{self.class.name} error: #{e.class} - #{e.message}") + failure(e) + end +end diff --git a/app/models/vector_store/openai.rb b/app/models/vector_store/openai.rb new file mode 100644 index 000000000..43d487826 --- /dev/null +++ b/app/models/vector_store/openai.rb @@ -0,0 +1,89 @@ +# Adapter that delegates to OpenAI's hosted vector-store and file-search APIs. +# +# Requirements: +# - gem "ruby-openai" (already in Gemfile) +# - OPENAI_ACCESS_TOKEN env var or Setting.openai_access_token +# +# OpenAI manages chunking, embedding, and retrieval; we simply upload files +# and issue search queries. +class VectorStore::Openai < VectorStore::Base + def initialize(access_token:, uri_base: nil) + client_options = { access_token: access_token } + client_options[:uri_base] = uri_base if uri_base.present? + client_options[:request_timeout] = ENV.fetch("OPENAI_REQUEST_TIMEOUT", 60).to_i + + @client = ::OpenAI::Client.new(**client_options) + end + + def create_store(name:) + with_response do + response = client.vector_stores.create(parameters: { name: name }) + { id: response["id"] } + end + end + + def delete_store(store_id:) + with_response do + client.vector_stores.delete(id: store_id) + end + end + + def upload_file(store_id:, file_content:, filename:) + with_response do + tempfile = Tempfile.new([ File.basename(filename, ".*"), File.extname(filename) ]) + begin + tempfile.binmode + tempfile.write(file_content) + tempfile.rewind + + file_response = client.files.upload( + parameters: { file: tempfile, purpose: "assistants" } + ) + file_id = file_response["id"] + + begin + client.vector_store_files.create( + vector_store_id: store_id, + parameters: { file_id: file_id } + ) + rescue => e + client.files.delete(id: file_id) rescue nil + raise + end + + { file_id: file_id } + ensure + tempfile.close + tempfile.unlink + end + end + end + + def remove_file(store_id:, file_id:) + with_response do + client.vector_store_files.delete(vector_store_id: store_id, id: file_id) + end + end + + def search(store_id:, query:, max_results: 10) + with_response do + response = client.vector_stores.search( + id: store_id, + parameters: { query: query, max_num_results: max_results } + ) + + (response["data"] || []).map do |result| + { + content: Array(result["content"]).filter_map { |c| c["text"] }.join("\n"), + filename: result["filename"], + score: result["score"], + file_id: result["file_id"] + } + end + end + end + + private + + attr_reader :client +end diff --git a/app/models/vector_store/pgvector.rb b/app/models/vector_store/pgvector.rb new file mode 100644 index 000000000..5d4fe4a61 --- /dev/null +++ b/app/models/vector_store/pgvector.rb @@ -0,0 +1,89 @@ +# Adapter that stores embeddings locally in PostgreSQL using the pgvector extension. +# +# This keeps all data on your own infrastructure — no external vector-store +# service required. You still need an embedding provider (e.g. OpenAI, or a +# local model served via an OpenAI-compatible endpoint) to turn text into +# vectors before insertion and at query time. +# +# Requirements (not yet wired up): +# - PostgreSQL with the `vector` extension enabled +# - gem "neighbor" (for ActiveRecord integration) or raw SQL +# - An embedding model endpoint (EMBEDDING_MODEL_URL / EMBEDDING_MODEL_NAME) +# - A chunking strategy (see #chunk_file below) +# +# Schema sketch (for reference — migration not included): +# +# create_table :vector_store_chunks do |t| +# t.string :store_id, null: false # logical namespace +# t.string :file_id, null: false +# t.string :filename +# t.text :content # the original text chunk +# t.vector :embedding, limit: 1536 # adjust dimensions to your model +# t.jsonb :metadata, default: {} +# t.timestamps +# end +# add_index :vector_store_chunks, :store_id +# add_index :vector_store_chunks, :file_id +# +class VectorStore::Pgvector < VectorStore::Base + def create_store(name:) + with_response do + # A "store" is just a logical namespace (a UUID). + # No external resource to create. + # { id: SecureRandom.uuid } + raise VectorStore::Error, "Pgvector adapter is not yet implemented" + end + end + + def delete_store(store_id:) + with_response do + # TODO: DELETE FROM vector_store_chunks WHERE store_id = ? + raise VectorStore::Error, "Pgvector adapter is not yet implemented" + end + end + + def upload_file(store_id:, file_content:, filename:) + with_response do + # 1. chunk_file(file_content, filename) → array of text chunks + # 2. embed each chunk via the configured embedding model + # 3. INSERT INTO vector_store_chunks (store_id, file_id, filename, content, embedding) + raise VectorStore::Error, "Pgvector adapter is not yet implemented" + end + end + + def remove_file(store_id:, file_id:) + with_response do + # TODO: DELETE FROM vector_store_chunks WHERE store_id = ? AND file_id = ? + raise VectorStore::Error, "Pgvector adapter is not yet implemented" + end + end + + def search(store_id:, query:, max_results: 10) + with_response do + # 1. embed(query) → vector + # 2. SELECT content, filename, file_id, + # 1 - (embedding <=> query_vector) AS score + # FROM vector_store_chunks + # WHERE store_id = ? + # ORDER BY embedding <=> query_vector + # LIMIT max_results + raise VectorStore::Error, "Pgvector adapter is not yet implemented" + end + end + + private + + # Placeholder: split file content into overlapping text windows. + # A real implementation would handle PDFs, DOCX, etc. via + # libraries like `pdf-reader`, `docx`, or an extraction service. + def chunk_file(file_content, filename) + # TODO: implement format-aware chunking + [] + end + + # Placeholder: call an embedding API to turn text into a vector. + def embed(text) + # TODO: call EMBEDDING_MODEL_URL or OpenAI embeddings endpoint + raise VectorStore::Error, "Embedding model not configured" + end +end diff --git a/app/models/vector_store/qdrant.rb b/app/models/vector_store/qdrant.rb new file mode 100644 index 000000000..fe1e4608c --- /dev/null +++ b/app/models/vector_store/qdrant.rb @@ -0,0 +1,81 @@ +# Adapter for Qdrant — a dedicated open-source vector database. +# +# Qdrant can run locally (Docker), self-hosted, or as a managed cloud service. +# Like the Pgvector adapter you still supply your own embedding model; Qdrant +# handles storage, indexing, and fast ANN search. +# +# Requirements (not yet wired up): +# - A running Qdrant instance (QDRANT_URL, default http://localhost:6333) +# - Optional QDRANT_API_KEY for authenticated clusters +# - An embedding model endpoint (EMBEDDING_MODEL_URL / EMBEDDING_MODEL_NAME) +# - gem "qdrant-ruby" or raw Faraday HTTP calls +# +# Mapping: +# store → Qdrant collection +# file → set of points sharing a file_id payload field +# search → query vector + payload filter on store_id +# +class VectorStore::Qdrant < VectorStore::Base + def initialize(url: "http://localhost:6333", api_key: nil) + @url = url + @api_key = api_key + end + + def create_store(name:) + with_response do + # POST /collections/{collection_name} { vectors: { size: 1536, distance: "Cosine" } } + # collection_name could be a slugified version of `name` or a UUID. + raise VectorStore::Error, "Qdrant adapter is not yet implemented" + end + end + + def delete_store(store_id:) + with_response do + # DELETE /collections/{store_id} + raise VectorStore::Error, "Qdrant adapter is not yet implemented" + end + end + + def upload_file(store_id:, file_content:, filename:) + with_response do + # 1. chunk file → text chunks + # 2. embed each chunk + # 3. PUT /collections/{store_id}/points { points: [...] } + # each point: { id: uuid, vector: [...], payload: { file_id, filename, content } } + raise VectorStore::Error, "Qdrant adapter is not yet implemented" + end + end + + def remove_file(store_id:, file_id:) + with_response do + # POST /collections/{store_id}/points/delete + # { filter: { must: [{ key: "file_id", match: { value: file_id } }] } } + raise VectorStore::Error, "Qdrant adapter is not yet implemented" + end + end + + def search(store_id:, query:, max_results: 10) + with_response do + # 1. embed(query) → vector + # 2. POST /collections/{store_id}/points/search + # { vector: [...], limit: max_results, with_payload: true } + # 3. map results → [{ content:, filename:, score:, file_id: }] + raise VectorStore::Error, "Qdrant adapter is not yet implemented" + end + end + + private + + def connection + @connection ||= Faraday.new(url: @url) do |f| + f.request :json + f.response :json + f.adapter Faraday.default_adapter + f.headers["api-key"] = @api_key if @api_key.present? + end + end + + def embed(text) + raise VectorStore::Error, "Embedding model not configured" + end +end diff --git a/app/models/vector_store/registry.rb b/app/models/vector_store/registry.rb new file mode 100644 index 000000000..10c73d770 --- /dev/null +++ b/app/models/vector_store/registry.rb @@ -0,0 +1,70 @@ +class VectorStore::Registry + ADAPTERS = { + openai: "VectorStore::Openai", + pgvector: "VectorStore::Pgvector", + qdrant: "VectorStore::Qdrant" + }.freeze + + class << self + # Returns the configured adapter instance. + # Reads from VECTOR_STORE_PROVIDER env var, falling back to :openai + # when OpenAI credentials are present. + def adapter + name = adapter_name + return nil unless name + + build_adapter(name) + end + + def configured? + adapter.present? + end + + def adapter_name + explicit = ENV["VECTOR_STORE_PROVIDER"].presence + return explicit.to_sym if explicit && ADAPTERS.key?(explicit.to_sym) + + # Default: use OpenAI when credentials are available + :openai if openai_access_token.present? + end + + private + + def build_adapter(name) + klass = ADAPTERS[name]&.safe_constantize + raise VectorStore::ConfigurationError, "Unknown vector store adapter: #{name}" unless klass + + case name + when :openai then build_openai + when :pgvector then build_pgvector + when :qdrant then build_qdrant + else raise VectorStore::ConfigurationError, "No builder defined for adapter: #{name}" + end + end + + def build_openai + token = openai_access_token + return nil unless token.present? + + VectorStore::Openai.new( + access_token: token, + uri_base: ENV["OPENAI_URI_BASE"].presence || Setting.openai_uri_base + ) + end + + def build_pgvector + VectorStore::Pgvector.new + end + + def build_qdrant + url = ENV.fetch("QDRANT_URL", "http://localhost:6333") + api_key = ENV["QDRANT_API_KEY"].presence + + VectorStore::Qdrant.new(url: url, api_key: api_key) + end + + def openai_access_token + ENV["OPENAI_ACCESS_TOKEN"].presence || Setting.openai_access_token + end + end +end diff --git a/db/migrate/20260211120001_add_vector_store_support.rb b/db/migrate/20260211120001_add_vector_store_support.rb new file mode 100644 index 000000000..b4a8355f0 --- /dev/null +++ b/db/migrate/20260211120001_add_vector_store_support.rb @@ -0,0 +1,19 @@ +class AddVectorStoreSupport < ActiveRecord::Migration[7.2] + def change + add_column :families, :vector_store_id, :string + + create_table :family_documents, id: :uuid, default: -> { "gen_random_uuid()" } do |t| + t.references :family, null: false, foreign_key: true, type: :uuid + t.string :filename, null: false + t.string :content_type + t.integer :file_size + t.string :provider_file_id + t.string :status, null: false, default: "pending" + t.jsonb :metadata, default: {} + t.timestamps + end + + add_index :family_documents, :status + add_index :family_documents, :provider_file_id + end +end diff --git a/db/schema.rb b/db/schema.rb index dd8b82423..785ba1fa3 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2026_02_10_120000) do +ActiveRecord::Schema[7.2].define(version: 2026_02_11_120001) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -25,7 +25,7 @@ ActiveRecord::Schema[7.2].define(version: 2026_02_10_120000) do t.uuid "provider_id", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index ["account_id", "provider_type"], name: "index_account_providers_on_account_id_and_provider_type", unique: true + t.index ["account_id", "provider_type"], name: "index_account_providers_on_account_and_provider_type", unique: true t.index ["provider_type", "provider_id"], name: "index_account_providers_on_provider_type_and_provider_id", unique: true end @@ -500,9 +500,25 @@ ActiveRecord::Schema[7.2].define(version: 2026_02_10_120000) do t.datetime "latest_sync_completed_at", default: -> { "CURRENT_TIMESTAMP" } t.boolean "recurring_transactions_disabled", default: false, null: false t.integer "month_start_day", default: 1, null: false + t.string "vector_store_id" t.check_constraint "month_start_day >= 1 AND month_start_day <= 28", name: "month_start_day_range" end + create_table "family_documents", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| + t.uuid "family_id", null: false + t.string "filename", null: false + t.string "content_type" + t.integer "file_size" + t.string "provider_file_id" + t.string "status", default: "pending", null: false + t.jsonb "metadata", default: {} + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["family_id"], name: "index_family_documents_on_family_id" + t.index ["provider_file_id"], name: "index_family_documents_on_provider_file_id" + t.index ["status"], name: "index_family_documents_on_status" + end + create_table "family_exports", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| t.uuid "family_id", null: false t.string "status", default: "pending", null: false @@ -1441,9 +1457,9 @@ ActiveRecord::Schema[7.2].define(version: 2026_02_10_120000) do t.datetime "set_onboarding_preferences_at" t.datetime "set_onboarding_goals_at" t.string "default_account_order", default: "name_asc" - t.string "ui_layout" t.jsonb "preferences", default: {}, null: false t.string "locale" + t.string "ui_layout" t.index ["email"], name: "index_users_on_email", unique: true t.index ["family_id"], name: "index_users_on_family_id" t.index ["last_viewed_chat_id"], name: "index_users_on_last_viewed_chat_id" @@ -1497,6 +1513,7 @@ ActiveRecord::Schema[7.2].define(version: 2026_02_10_120000) do add_foreign_key "eval_results", "eval_samples" add_foreign_key "eval_runs", "eval_datasets" add_foreign_key "eval_samples", "eval_datasets" + add_foreign_key "family_documents", "families" add_foreign_key "family_exports", "families" add_foreign_key "family_merchant_associations", "families" add_foreign_key "family_merchant_associations", "merchants" diff --git a/docs/hosting/ai.md b/docs/hosting/ai.md index 1106361a1..e25ddec37 100644 --- a/docs/hosting/ai.md +++ b/docs/hosting/ai.md @@ -633,6 +633,100 @@ The assistant uses OpenAI's function calling (tool use) to access user data: These are defined in `app/models/assistant/function/`. +### Vector Store (Document Search) + +Sure's AI assistant can search documents that have been uploaded to a family's vault. Under the hood, documents are indexed in a **vector store** so the assistant can retrieve relevant passages when answering questions (Retrieval-Augmented Generation). + +#### How It Works + +1. When a user uploads a document to their family vault, it is automatically pushed to the configured vector store. +2. When the assistant needs financial context from uploaded files, it calls the `search_family_files` function. +3. The vector store returns the most relevant passages, which the assistant uses to answer the question. + +#### Supported Backends + +| Backend | Best For | Requirements | +|---------|----------|--------------| +| **OpenAI** (default) | Cloud deployments, zero setup | `OPENAI_ACCESS_TOKEN` | +| **Pgvector** | Self-hosted, full data privacy | PostgreSQL with `pgvector` extension | +| **Qdrant** | Self-hosted, dedicated vector DB | Running Qdrant instance | + +#### Configuration + +##### OpenAI (Default) + +No extra configuration is needed. If you already have `OPENAI_ACCESS_TOKEN` set for the AI assistant, document search works automatically. OpenAI manages chunking, embedding, and retrieval. + +```bash +# Already set for AI chat — document search uses the same token +OPENAI_ACCESS_TOKEN=sk-proj-... +``` + +##### Pgvector (Self-Hosted) + +Use PostgreSQL's pgvector extension for fully local document search: + +```bash +VECTOR_STORE_PROVIDER=pgvector +``` + +> **Note:** The pgvector adapter is currently a skeleton. A future release will add full support including embedding model configuration. + +##### Qdrant (Self-Hosted) + +Use a dedicated Qdrant vector database: + +```bash +VECTOR_STORE_PROVIDER=qdrant +QDRANT_URL=http://localhost:6333 # Default if not set +QDRANT_API_KEY=your-api-key # Optional, for authenticated instances +``` + +Docker Compose example: + +```yaml +services: + sure: + environment: + - VECTOR_STORE_PROVIDER=qdrant + - QDRANT_URL=http://qdrant:6333 + depends_on: + - qdrant + + qdrant: + image: qdrant/qdrant:latest + ports: + - "6333:6333" + volumes: + - qdrant_data:/qdrant/storage + +volumes: + qdrant_data: +``` + +> **Note:** The Qdrant adapter is currently a skeleton. A future release will add full support including collection management and embedding configuration. + +#### Verifying the Configuration + +You can check whether a vector store is properly configured from the Rails console: + +```ruby +VectorStore.configured? # => true / false +VectorStore.adapter # => # +VectorStore.adapter.class.name # => "VectorStore::Openai" +``` + +#### Supported File Types + +The following file extensions are supported for document upload and search: + +`.pdf`, `.txt`, `.md`, `.csv`, `.json`, `.xml`, `.html`, `.css`, `.js`, `.rb`, `.py`, `.docx`, `.pptx`, `.xlsx`, `.yaml`, `.yml`, `.log`, `.sh` + +#### Privacy Notes + +- **OpenAI backend:** Document content is sent to OpenAI's API for indexing and search. The same privacy considerations as the AI chat apply. +- **Pgvector / Qdrant backends:** All data stays on your infrastructure. No external API calls are made for document search. + ### Multi-Model Setup Currently not supported out of the box, but you could: diff --git a/test/fixtures/family_documents.yml b/test/fixtures/family_documents.yml new file mode 100644 index 000000000..76e4b7fc2 --- /dev/null +++ b/test/fixtures/family_documents.yml @@ -0,0 +1,26 @@ +tax_return: + family: dylan_family + filename: 2024_tax_return.pdf + content_type: application/pdf + file_size: 102400 + provider_file_id: file-abc123 + status: ready + metadata: {} + +bank_statement: + family: dylan_family + filename: jan_2025_statement.pdf + content_type: application/pdf + file_size: 51200 + provider_file_id: file-def456 + status: ready + metadata: {} + +pending_doc: + family: dylan_family + filename: pending_upload.docx + content_type: application/vnd.openxmlformats-officedocument.wordprocessingml.document + file_size: 25600 + provider_file_id: + status: pending + metadata: {} diff --git a/test/models/assistant/function/search_family_files_test.rb b/test/models/assistant/function/search_family_files_test.rb new file mode 100644 index 000000000..254596810 --- /dev/null +++ b/test/models/assistant/function/search_family_files_test.rb @@ -0,0 +1,129 @@ +require "test_helper" + +class Assistant::Function::SearchFamilyFilesTest < ActiveSupport::TestCase + setup do + @user = users(:family_admin) + @function = Assistant::Function::SearchFamilyFiles.new(@user) + end + + test "has correct name" do + assert_equal "search_family_files", @function.name + end + + test "has a description" do + assert_not_empty @function.description + end + + test "is not in strict mode" do + assert_not @function.strict_mode? + end + + test "params_schema requires query" do + schema = @function.params_schema + assert_includes schema[:required], "query" + assert schema[:properties].key?(:query) + end + + test "generates valid tool definition" do + definition = @function.to_definition + assert_equal "search_family_files", definition[:name] + assert_not_nil definition[:description] + assert_not_nil definition[:params_schema] + assert_equal false, definition[:strict] + end + + test "returns no_documents error when family has no vector store" do + @user.family.update!(vector_store_id: nil) + + result = @function.call("query" => "tax return") + + assert_equal false, result[:success] + assert_equal "no_documents", result[:error] + end + + test "returns provider_not_configured when no adapter is available" do + @user.family.update!(vector_store_id: "vs_test123") + VectorStore::Registry.stubs(:adapter).returns(nil) + + result = @function.call("query" => "tax return") + + assert_equal false, result[:success] + assert_equal "provider_not_configured", result[:error] + end + + test "returns search results on success" do + @user.family.update!(vector_store_id: "vs_test123") + + mock_adapter = mock("vector_store_adapter") + mock_adapter.stubs(:search).returns( + VectorStore::Response.new( + success?: true, + data: [ + { content: "Total income: $85,000", filename: "2024_tax_return.pdf", score: 0.95, file_id: "file-abc" }, + { content: "W-2 wages: $80,000", filename: "2024_tax_return.pdf", score: 0.87, file_id: "file-abc" } + ], + error: nil + ) + ) + + VectorStore::Registry.stubs(:adapter).returns(mock_adapter) + + result = @function.call("query" => "What was my total income?") + + assert_equal true, result[:success] + assert_equal 2, result[:result_count] + assert_equal "Total income: $85,000", result[:results].first[:content] + assert_equal "2024_tax_return.pdf", result[:results].first[:filename] + end + + test "returns empty results message when no matches found" do + @user.family.update!(vector_store_id: "vs_test123") + + mock_adapter = mock("vector_store_adapter") + mock_adapter.stubs(:search).returns( + VectorStore::Response.new(success?: true, data: [], error: nil) + ) + + VectorStore::Registry.stubs(:adapter).returns(mock_adapter) + + result = @function.call("query" => "nonexistent document") + + assert_equal true, result[:success] + assert_empty result[:results] + end + + test "handles search failure gracefully" do + @user.family.update!(vector_store_id: "vs_test123") + + mock_adapter = mock("vector_store_adapter") + mock_adapter.stubs(:search).returns( + VectorStore::Response.new( + success?: false, + data: nil, + error: VectorStore::Error.new("API rate limit exceeded") + ) + ) + + VectorStore::Registry.stubs(:adapter).returns(mock_adapter) + + result = @function.call("query" => "tax return") + + assert_equal false, result[:success] + assert_equal "search_failed", result[:error] + end + + test "caps max_results at 20" do + @user.family.update!(vector_store_id: "vs_test123") + + mock_adapter = mock("vector_store_adapter") + mock_adapter.expects(:search).with( + store_id: "vs_test123", + query: "test", + max_results: 20 + ).returns(VectorStore::Response.new(success?: true, data: [], error: nil)) + + VectorStore::Registry.stubs(:adapter).returns(mock_adapter) + + @function.call("query" => "test", "max_results" => 50) + end +end diff --git a/test/models/family_document_test.rb b/test/models/family_document_test.rb new file mode 100644 index 000000000..4be07ef41 --- /dev/null +++ b/test/models/family_document_test.rb @@ -0,0 +1,54 @@ +require "test_helper" + +class FamilyDocumentTest < ActiveSupport::TestCase + setup do + @family = families(:dylan_family) + @document = family_documents(:tax_return) + end + + test "belongs to a family" do + assert_equal @family, @document.family + end + + test "validates filename presence" do + doc = FamilyDocument.new(family: @family, status: "pending") + assert_not doc.valid? + assert_includes doc.errors[:filename], "can't be blank" + end + + test "validates status inclusion" do + doc = FamilyDocument.new(family: @family, filename: "test.pdf", status: "invalid") + assert_not doc.valid? + assert_includes doc.errors[:status], "is not included in the list" + end + + test "ready scope returns only ready documents" do + ready_docs = @family.family_documents.ready + assert ready_docs.all? { |d| d.status == "ready" } + assert_not_includes ready_docs, family_documents(:pending_doc) + end + + test "mark_ready! updates status" do + doc = family_documents(:pending_doc) + doc.mark_ready! + assert_equal "ready", doc.reload.status + end + + test "mark_error! updates status and metadata" do + doc = family_documents(:pending_doc) + doc.mark_error!("Upload failed") + doc.reload + assert_equal "error", doc.status + assert_equal "Upload failed", doc.metadata["error"] + end + + test "supported_extension? returns true for supported types" do + doc = FamilyDocument.new(filename: "report.pdf") + assert doc.supported_extension? + end + + test "supported_extension? returns false for unsupported types" do + doc = FamilyDocument.new(filename: "video.mp4") + assert_not doc.supported_extension? + end +end diff --git a/test/models/vector_store/base_test.rb b/test/models/vector_store/base_test.rb new file mode 100644 index 000000000..9fb755c56 --- /dev/null +++ b/test/models/vector_store/base_test.rb @@ -0,0 +1,42 @@ +require "test_helper" + +class VectorStore::BaseTest < ActiveSupport::TestCase + setup do + @adapter = VectorStore::Base.new + end + + test "create_store raises NotImplementedError" do + assert_raises(NotImplementedError) { @adapter.create_store(name: "test") } + end + + test "delete_store raises NotImplementedError" do + assert_raises(NotImplementedError) { @adapter.delete_store(store_id: "test") } + end + + test "upload_file raises NotImplementedError" do + assert_raises(NotImplementedError) { @adapter.upload_file(store_id: "s", file_content: "c", filename: "f") } + end + + test "remove_file raises NotImplementedError" do + assert_raises(NotImplementedError) { @adapter.remove_file(store_id: "s", file_id: "f") } + end + + test "search raises NotImplementedError" do + assert_raises(NotImplementedError) { @adapter.search(store_id: "s", query: "q") } + end + + test "supported_extensions includes common file types" do + exts = @adapter.supported_extensions + assert_includes exts, ".pdf" + assert_includes exts, ".docx" + assert_includes exts, ".xlsx" + assert_includes exts, ".csv" + assert_includes exts, ".json" + assert_includes exts, ".txt" + assert_includes exts, ".md" + end + + test "SUPPORTED_EXTENSIONS is frozen" do + assert VectorStore::Base::SUPPORTED_EXTENSIONS.frozen? + end +end diff --git a/test/models/vector_store/openai_test.rb b/test/models/vector_store/openai_test.rb new file mode 100644 index 000000000..dc900c073 --- /dev/null +++ b/test/models/vector_store/openai_test.rb @@ -0,0 +1,132 @@ +require "test_helper" + +class VectorStore::OpenaiTest < ActiveSupport::TestCase + setup do + @adapter = VectorStore::Openai.new(access_token: "sk-test-key") + end + + test "create_store wraps response" do + mock_client = mock("openai_client") + mock_vs = mock("vector_stores") + mock_vs.expects(:create).with(parameters: { name: "Test Store" }).returns({ "id" => "vs_abc123" }) + mock_client.stubs(:vector_stores).returns(mock_vs) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.create_store(name: "Test Store") + assert response.success? + assert_equal "vs_abc123", response.data[:id] + end + + test "delete_store wraps response" do + mock_client = mock("openai_client") + mock_vs = mock("vector_stores") + mock_vs.expects(:delete).with(id: "vs_abc123").returns(true) + mock_client.stubs(:vector_stores).returns(mock_vs) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.delete_store(store_id: "vs_abc123") + assert response.success? + end + + test "upload_file uploads and attaches to store" do + mock_client = mock("openai_client") + mock_files = mock("files") + mock_files.expects(:upload).returns({ "id" => "file-xyz" }) + mock_vs_files = mock("vector_store_files") + mock_vs_files.expects(:create).with( + vector_store_id: "vs_abc123", + parameters: { file_id: "file-xyz" } + ).returns(true) + + mock_client.stubs(:files).returns(mock_files) + mock_client.stubs(:vector_store_files).returns(mock_vs_files) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.upload_file( + store_id: "vs_abc123", + file_content: "Hello world", + filename: "test.txt" + ) + + assert response.success? + assert_equal "file-xyz", response.data[:file_id] + end + + test "remove_file deletes from store" do + mock_client = mock("openai_client") + mock_vs_files = mock("vector_store_files") + mock_vs_files.expects(:delete).with( + vector_store_id: "vs_abc123", + id: "file-xyz" + ).returns(true) + mock_client.stubs(:vector_store_files).returns(mock_vs_files) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.remove_file(store_id: "vs_abc123", file_id: "file-xyz") + assert response.success? + end + + test "search uses gem client and parses results" do + mock_client = mock("openai_client") + mock_vs = mock("vector_stores") + mock_vs.expects(:search).with( + id: "vs_abc123", + parameters: { query: "income", max_num_results: 5 } + ).returns({ + "data" => [ + { + "file_id" => "file-xyz", + "filename" => "tax_return.pdf", + "score" => 0.95, + "content" => [ { "type" => "text", "text" => "Total income: $85,000" } ] + } + ] + }) + mock_client.stubs(:vector_stores).returns(mock_vs) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.search(store_id: "vs_abc123", query: "income", max_results: 5) + assert response.success? + assert_equal 1, response.data.size + assert_equal "Total income: $85,000", response.data.first[:content] + assert_equal "tax_return.pdf", response.data.first[:filename] + assert_equal 0.95, response.data.first[:score] + end + + test "search returns empty array when no results" do + mock_client = mock("openai_client") + mock_vs = mock("vector_stores") + mock_vs.expects(:search).returns({ "data" => [] }) + mock_client.stubs(:vector_stores).returns(mock_vs) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.search(store_id: "vs_abc123", query: "nothing") + assert response.success? + assert_empty response.data + end + + test "wraps errors in failure response" do + mock_client = mock("openai_client") + mock_vs = mock("vector_stores") + mock_vs.expects(:create).raises(StandardError, "API error") + mock_client.stubs(:vector_stores).returns(mock_vs) + + @adapter.instance_variable_set(:@client, mock_client) + + response = @adapter.create_store(name: "Broken Store") + assert_not response.success? + assert_equal "API error", response.error.message + end + + test "supported_extensions returns the default list" do + assert_includes @adapter.supported_extensions, ".pdf" + assert_includes @adapter.supported_extensions, ".docx" + assert_includes @adapter.supported_extensions, ".csv" + end +end diff --git a/test/models/vector_store/registry_test.rb b/test/models/vector_store/registry_test.rb new file mode 100644 index 000000000..b2d66779e --- /dev/null +++ b/test/models/vector_store/registry_test.rb @@ -0,0 +1,53 @@ +require "test_helper" + +class VectorStore::RegistryTest < ActiveSupport::TestCase + test "adapter_name defaults to openai when access token present" do + VectorStore::Registry.stubs(:openai_access_token).returns("sk-test") + ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do + assert_equal :openai, VectorStore::Registry.adapter_name + end + end + + test "adapter_name returns nil when no credentials configured" do + VectorStore::Registry.stubs(:openai_access_token).returns(nil) + ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do + assert_nil VectorStore::Registry.adapter_name + end + end + + test "adapter_name respects explicit VECTOR_STORE_PROVIDER" do + ClimateControl.modify(VECTOR_STORE_PROVIDER: "qdrant") do + assert_equal :qdrant, VectorStore::Registry.adapter_name + end + end + + test "adapter_name falls back to openai for unknown provider" do + VectorStore::Registry.stubs(:openai_access_token).returns("sk-test") + ClimateControl.modify(VECTOR_STORE_PROVIDER: "unknown_store") do + assert_equal :openai, VectorStore::Registry.adapter_name + end + end + + test "adapter returns VectorStore::Openai instance when openai configured" do + VectorStore::Registry.stubs(:openai_access_token).returns("sk-test") + ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do + adapter = VectorStore::Registry.adapter + assert_instance_of VectorStore::Openai, adapter + end + end + + test "adapter returns nil when nothing configured" do + VectorStore::Registry.stubs(:openai_access_token).returns(nil) + ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do + assert_nil VectorStore::Registry.adapter + end + end + + test "configured? delegates to adapter presence" do + VectorStore::Registry.stubs(:adapter).returns(nil) + assert_not VectorStore.configured? + + VectorStore::Registry.stubs(:adapter).returns(VectorStore::Openai.new(access_token: "sk-test")) + assert VectorStore.configured? + end +end