Create a New Collection
Create and Open a Collection
To create a new, empty collection in Zvec, you need to define the following:
- Schema — the structural blueprint of your data, specifying scalar fields and vector embeddings.
- Collection options (optional) — runtime settings that control how the collection behaves when opened (e.g., read-only mode).
Once defined, call create_and_open() to initialize a new collection at a specified path and return a Collection object ready for inserts and queries.
If a collection already exists at the specified path, create_and_open() will raise an error to prevent accidental overwrites.
import zvec
# Define a collection schema
collection_schema = zvec.CollectionSchema(
name="example_collection",
fields=[
zvec.FieldSchema(
name="string_field_example",
data_type=zvec.DataType.STRING,
nullable=True,
index_param=zvec.InvertIndexParam(enable_range_optimization=False),
),
],
vectors=[
zvec.VectorSchema(
name="dense_vector_example",
data_type=zvec.DataType.VECTOR_FP32,
dimension=768,
index_param=zvec.HnswIndexParam(metric_type=zvec.MetricType.COSINE),
),
],
)
# Create and open the collection
collection = zvec.create_and_open(
path="/path/to/my/collection",
schema=collection_schema,
option=zvec.CollectionOption(read_only=False, enable_mmap=True),
)Real-World Example: 🛒 Product Search
This schema models a multi-modal product search system, combining visual, textual, and structured metadata for rich retrieval:
🗂️ Scalar Fields: For Filtering & Display
category(array of strings, indexed): Enables queries likecategory CONTAIN_ANY ("electronics", "headphones")to find products that belong to either "electronics" or "headphones" (or both).price(integer, indexed with range optimization): Supports fast range queries such asprice > 100.in_stock(boolean, indexed): Enables instant filtering by availability (e.g., "only show items in stock").image_urlanddescriptionare stored but not indexed, since they're only used for display.
📐 Vector Embeddings: For Semantic Relevance
- Two dense vectors capture semantic meaning:
image_vec: 512-dimensional embeddings from product images (e.g., via a vision model).description_vec: 768-dimensional embeddings from product descriptions (e.g., from a language model), stored with quantization.
- One sparse vector
keywords_sparsefor keyword matching, enabling hybrid sparse-dense search.
import zvec
collection_schema = zvec.CollectionSchema(
name="product_search",
fields=[
zvec.FieldSchema(
name="image_url",
data_type=zvec.DataType.STRING, # Not used in filtering, no index created
nullable=True, # Could be null
),
zvec.FieldSchema(
name="description",
data_type=zvec.DataType.STRING, # Not used in filtering, no index created
),
zvec.FieldSchema(
name="category",
data_type=zvec.DataType.ARRAY_STRING,
# Inverted index for array membership queries
index_param=zvec.InvertIndexParam(),
),
zvec.FieldSchema(
name="price",
data_type=zvec.DataType.INT32,
# Optimization for range queries, e.g., price > 100
index_param=zvec.InvertIndexParam(enable_range_optimization=True),
),
zvec.FieldSchema(
name="in_stock",
data_type=zvec.DataType.BOOL,
# Inverted index for boolean queries
index_param=zvec.InvertIndexParam(),
),
],
vectors=[
# Dense embedding from product images
zvec.VectorSchema(
name="image_vec",
data_type=zvec.DataType.VECTOR_FP32,
dimension=512,
# Use HNSW index for similarity search with cosine distance metric
index_param=zvec.HnswIndexParam(metric_type=zvec.MetricType.COSINE),
),
# Dense embedding from product descriptions
zvec.VectorSchema(
name="description_vec",
data_type=zvec.DataType.VECTOR_FP32,
dimension=768,
# Enable quantization for faster similarity search
index_param=zvec.HnswIndexParam(metric_type=zvec.MetricType.COSINE, quantize_type=zvec.QuantizeType.INT8),
),
# Sparse vector from product keywords
zvec.VectorSchema(
name="keywords_sparse",
data_type=zvec.DataType.SPARSE_VECTOR_FP32,
# Use HNSW index for similarity search with inner product metric
index_param=zvec.HnswIndexParam(metric_type=zvec.MetricType.IP),
),
],
)
collection = zvec.create_and_open(
path="path/to/collection",
schema=collection_schema,
option=zvec.CollectionOption(read_only=False, enable_mmap=True),
)