mirror of
https://github.com/we-promise/sure.git
synced 2026-04-10 15:54:48 +00:00
* Add conditional migration for vector_store_chunks table Creates the pgvector-backed chunks table when VECTOR_STORE_PROVIDER=pgvector. Enables the vector extension, adds store_id/file_id indexes, and uses vector(1024) column type for embeddings. * Add VectorStore::Embeddable concern for text extraction and embedding Shared concern providing extract_text (PDF via pdf-reader, plain-text as-is), paragraph-boundary chunking (~2000 chars, ~200 overlap), and embed/embed_batch via OpenAI-compatible /v1/embeddings endpoint using Faraday. Configurable via EMBEDDING_MODEL, EMBEDDING_URI_BASE, with fallback to OPENAI_* env vars. * Implement VectorStore::Pgvector adapter with raw SQL Replaces the stub with a full implementation using ActiveRecord::Base.connection with parameterized binds. Supports create_store, delete_store, upload_file (extract+chunk+embed+insert), remove_file, and cosine-similarity search via the <=> operator. * Add registry test for pgvector adapter selection * Configure pgvector in compose.example.ai.yml Switch db image to pgvector/pgvector:pg16, add VECTOR_STORE_PROVIDER, EMBEDDING_MODEL, and EMBEDDING_DIMENSIONS env vars, and include nomic-embed-text in Ollama's pre-loaded models. * Update pgvector docs from scaffolded to ready Document env vars, embedding model setup, pgvector Docker image requirement, and Ollama pull instructions. * Address PR review feedback - Migration: remove env guard, use pgvector_available? check so it runs on plain Postgres (CI) but creates the table on pgvector-capable servers. Add NOT NULL constraints on content/embedding/metadata, unique index on (store_id, file_id, chunk_index). - Pgvector adapter: wrap chunk inserts in a DB transaction to prevent partial file writes. Override supported_extensions to match formats that extract_text can actually parse. - Embeddable: add hard_split fallback for paragraphs exceeding CHUNK_SIZE to avoid overflowing embedding model token limits. * Bump schema version to include vector_store_chunks migration CI uses db:schema:load which checks the version — without this bump, the migration is detected as pending and tests fail to start. * Update 20260316120000_create_vector_store_chunks.rb --------- Co-authored-by: sokiee <sokysrm@gmail.com>
61 lines
2.2 KiB
Ruby
61 lines
2.2 KiB
Ruby
require "test_helper"
|
|
|
|
class VectorStore::RegistryTest < ActiveSupport::TestCase
|
|
test "adapter_name defaults to openai when access token present" do
|
|
VectorStore::Registry.stubs(:openai_access_token).returns("sk-test")
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do
|
|
assert_equal :openai, VectorStore::Registry.adapter_name
|
|
end
|
|
end
|
|
|
|
test "adapter_name returns nil when no credentials configured" do
|
|
VectorStore::Registry.stubs(:openai_access_token).returns(nil)
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do
|
|
assert_nil VectorStore::Registry.adapter_name
|
|
end
|
|
end
|
|
|
|
test "adapter_name respects explicit VECTOR_STORE_PROVIDER" do
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: "qdrant") do
|
|
assert_equal :qdrant, VectorStore::Registry.adapter_name
|
|
end
|
|
end
|
|
|
|
test "adapter_name falls back to openai for unknown provider" do
|
|
VectorStore::Registry.stubs(:openai_access_token).returns("sk-test")
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: "unknown_store") do
|
|
assert_equal :openai, VectorStore::Registry.adapter_name
|
|
end
|
|
end
|
|
|
|
test "adapter returns VectorStore::Openai instance when openai configured" do
|
|
VectorStore::Registry.stubs(:openai_access_token).returns("sk-test")
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do
|
|
adapter = VectorStore::Registry.adapter
|
|
assert_instance_of VectorStore::Openai, adapter
|
|
end
|
|
end
|
|
|
|
test "adapter returns nil when nothing configured" do
|
|
VectorStore::Registry.stubs(:openai_access_token).returns(nil)
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: nil) do
|
|
assert_nil VectorStore::Registry.adapter
|
|
end
|
|
end
|
|
|
|
test "adapter returns VectorStore::Pgvector instance when pgvector configured" do
|
|
ClimateControl.modify(VECTOR_STORE_PROVIDER: "pgvector") do
|
|
adapter = VectorStore::Registry.adapter
|
|
assert_instance_of VectorStore::Pgvector, adapter
|
|
end
|
|
end
|
|
|
|
test "configured? delegates to adapter presence" do
|
|
VectorStore::Registry.stubs(:adapter).returns(nil)
|
|
assert_not VectorStore.configured?
|
|
|
|
VectorStore::Registry.stubs(:adapter).returns(VectorStore::Openai.new(access_token: "sk-test"))
|
|
assert VectorStore.configured?
|
|
end
|
|
end
|