文章目錄
- 前言
- java實戰-Milvus 2.5.x版本向量庫-通過集合字段變更示例學習相關api demo
- 1. Milvus版本
- 2. 示例邏輯分析
- 3. 集合字段變更示例demo
- 4. 測試
前言
??如果您覺得有用的話,記得給博主點個贊,評論,收藏一鍵三連啊,寫作不易啊^ _ ^。
??而且聽說點贊的人每天的運氣都不會太差,實在白嫖的話,那歡迎常來啊!!!
java實戰-Milvus 2.5.x版本向量庫-通過集合字段變更示例學習相關api demo
注意:
關于demo中的Milvus 連接池與key的管理參考下面這篇文章。
java-Milvus 向量庫(2.5.x版本)-連接池(多key)與自定義端點監聽設計
1. Milvus版本
2. 示例邏輯分析
Milvus 不支持直接修改集合 schema(比如新增字段),所以你只能:
- 創建一個新集合(含你想新增的字段);
- 把舊集合的數據遷移過去;
- 繼續在新集合中做后續操作。
注意的是數據遷移建議用 queryIteratorV2,而不是 searchIteratorV2.
功能 | queryIteratorV2 | searchIteratorV2 |
---|---|---|
用途 | 查詢結構化數據(按條件篩選、返回字段) | 向量搜索,查找相似向量 |
是否依賴向量字段 | ? 不依賴 | ? 必須提供向量(FloatVector) |
用途場景 | 數據導出、分頁查看、遷移、統計等 | 向量相似度匹配(推薦、檢索) |
是否支持 filter | ? 支持 expr / filter | ? 支持 filter,但只能配合向量使用 |
返回內容 | 任意字段(非向量也可以) | topK 相似度結果(帶 score) |
3. 集合字段變更示例demo
package org.example.controller;import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import lombok.extern.slf4j.Slf4j;
import org.example.annotation.CommonLog;
import org.example.exception.model.ResponseResult;
import org.example.service.MilvusService;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RestController;import javax.annotation.Resource;/**
* @author 楊鎮宇
* @date 2025/7/8 08:46
* @version 1.0
*/
@Api(value = " milvus 練習", tags = {" milvus 練習"})
@Slf4j
@RestController
@RequestMapping(value="api/milvus")
public class MilvusController {@Resourceprivate MilvusService milvusService;@ApiOperation(value = "集合字段變更(數據遷移)測試", notes = "集合字段變更(數據遷移)測試")@CrossOrigin(origins = "*")@CommonLog(methodName = "集合字段變更(數據遷移)測試",className = "MilvusController#updateMigrateData",url = "api/milvus/updateMigrateData")@RequestMapping(value = "/updateMigrateData", method = RequestMethod.POST)public ResponseResult updateMigrateData(){milvusService.updateMigrateData();return ResponseResult.ok("測試完成");}}
package org.example.service.impl;import com.google.gson.Gson;
import com.google.gson.JsonObject;
import io.milvus.orm.iterator.QueryIterator;
import io.milvus.orm.iterator.SearchIteratorV2;
import io.milvus.response.QueryResultsWrapper;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.ConsistencyLevel;
import io.milvus.v2.common.DataType;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.*;
import io.milvus.v2.service.collection.response.DescribeCollectionResp;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.request.QueryIteratorReq;
import io.milvus.v2.service.vector.request.SearchIteratorReqV2;
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.response.SearchResp;
import lombok.extern.slf4j.Slf4j;
import org.example.milvus.config.OldMilvusServiceClient;
import org.example.milvus.model.MilvusAdminClient;
import org.example.milvus.model.MilvusInsertClient;
import org.example.milvus.model.MilvusSearchClient;
import org.example.service.MilvusService;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.*;/**
* @author 楊鎮宇
* @date 2025/7/8 09:02
* @version 1.0
*/
@Slf4j
@Service
public class MilvusServiceImpl implements MilvusService {/*** key:search-module*/@Resourceprivate MilvusSearchClient searchClient;/*** key:insert-module*/@Resourceprivate MilvusInsertClient insertClient;/*** key:admin-module*/@Resourceprivate MilvusAdminClient adminClient;/*** milvusServiceClient舊客戶端*/@Resourceprivate OldMilvusServiceClient oldMilvusServiceClient;// 歸一化函數(單位向量)public List<Float> normalizeVector(List<Float> vector) {double norm = 0.0;for (Float v : vector) {norm += v * v;}norm = Math.sqrt(norm);List<Float> normalized = new ArrayList<>();for (Float v : vector) {normalized.add((float) (v / norm));}return normalized;}/*** java實戰-Milvus 向量庫 集合字段變更*/@Overridepublic void updateMigrateData() {MilvusClientV2 client = adminClient.getClient();int dim = 4;// 維度String collectionName = "java_test";String description = "測試表";CreateCollectionReq.CollectionSchema collectionSchema = client.createSchema();collectionSchema.addField(AddFieldReq.builder().fieldName("id").dataType(DataType.Int64).isPrimaryKey(Boolean.TRUE).autoID(Boolean.FALSE).description("id").build());collectionSchema.addField(AddFieldReq.builder().fieldName("vector").dataType(DataType.FloatVector).dimension(dim).description("向量字段").build());collectionSchema.addField(AddFieldReq.builder().fieldName("user").dataType(DataType.VarChar).maxLength(100).description("用戶").build());collectionSchema.addField(AddFieldReq.builder().fieldName("timestamp").dataType(DataType.Int64).description("時間").build());log.info("=============創建測試表提供測試===================");createTable(dim,collectionSchema,client,collectionName,description);log.info("--------------插入1200條數據用作測試--------------");MilvusClientV2 insert = insertClient.getClient();batchInsert(insert,collectionName);log.info("--------------查詢前10條數據驗證--------------");List<Float> queryVector = Arrays.asList(1.0f, 2.0f, 3.0f, 4.0f); // 示例向量query(client,queryVector,collectionName);// 新增字段AddFieldReq addBuild = AddFieldReq.builder().fieldName("age").dataType(DataType.VarChar).maxLength(100).description("年齡").isNullable(true) // ? 允許為空.build();collectionSchema.addField(addBuild);log.info("=============創建新表,進行數據遷移===================");String newCollection = migrateData(client, collectionName, collectionSchema, dim, description);log.info("--------------查詢前10條數據驗證--------------");query(client,queryVector,newCollection);log.info("刪除舊表");DropCollectionReq dropCollectionReq = DropCollectionReq.builder().collectionName(collectionName).build();client.dropCollection(dropCollectionReq);}public void query(MilvusClientV2 client,List<Float> queryVector,String collectionName){queryVector = normalizeVector(queryVector); // ? 添加歸一化SearchIteratorReqV2 searchReq = SearchIteratorReqV2.builder().collectionName(collectionName) // 替換為你的集合名.outputFields(Arrays.asList("id", "user", "timestamp","vector")) // 可加你想驗證的字段.batchSize(10L).vectorFieldName("vector").vectors(Collections.singletonList(new FloatVec(queryVector))).filter("id > 0") // 可選條件,確保有返回.topK(10) // 查詢前 10 個最相似的.metricType(IndexParam.MetricType.COSINE).consistencyLevel(ConsistencyLevel.BOUNDED).build();SearchIteratorV2 searchIterator = client.searchIteratorV2(searchReq);log.info("🔍 表【{}】:前 10 條查詢結果:",collectionName);while (true) {List<SearchResp.SearchResult> res = searchIterator.next();if (res.isEmpty()) {log.info("🔍 查詢結束");searchIterator.close();break;}for (SearchResp.SearchResult record : res) {Map<String, Object> entity = record.getEntity();log.info("🔍 >>>>> id:{},user:{},timestamp:{},vector:{}",entity.get("id"),entity.get("user"),entity.get("timestamp"),entity.get("vector"));}}}/*** 插入1200條數據* @param client*/public void batchInsert(MilvusClientV2 client,String collectionName){Gson gson = new Gson();List<JsonObject> dataList = new ArrayList<>();for (long i = 1; i <= 1200; i++) {JsonObject row = new JsonObject();// 構造向量:這里用隨機值或自定義值填充List<Float> vector = Arrays.asList(1.0f * i, 2.0f * i, 3.0f * i, 4.0f * i); // dim = 4row.add("vector", gson.toJsonTree(normalizeVector(vector)));row.addProperty("id", i); // 手動指定主鍵row.addProperty("user", "user_" + i); // 示例字段row.addProperty("timestamp", System.currentTimeMillis()); // 注意 Int16 用 shortdataList.add(row);// 每 200 條插入一次,避免過大 payloadif (dataList.size() == 200 || i == 1200) {InsertReq insertReq = InsertReq.builder().collectionName(collectionName) // 替換為你的集合名.data(dataList).build();client.insert(insertReq);log.info("? 已插入 {} 條數據", i);dataList.clear();}}}/*** 新增字段,數據遷移*/public String migrateData(MilvusClientV2 client, String collectionName,CreateCollectionReq.CollectionSchema collectionSchema,int dim,String description){String newCollection = collectionName + "_new";// 獲取舊集合結構信息DescribeCollectionResp oldSchemaResp = client.describeCollection(DescribeCollectionReq.builder().collectionName(collectionName).build());List<String> oldFieldNames = oldSchemaResp.getFieldNames();// 創建新集合createTable(dim,collectionSchema,client,newCollection,description);// 獲取新集合結構信息DescribeCollectionResp newSchemaResp = client.describeCollection(DescribeCollectionReq.builder().collectionName(newCollection).build());List<String> newFieldNames = newSchemaResp.getFieldNames();Set<String> oldFieldSet = new HashSet<>(oldFieldNames);Gson gson = new Gson(); // 如果沒有引入 gson,請確保引入:com.google.code.gson:gsonlong offset = 0;long limit = 200;while (true) {QueryIteratorReq queryReq = QueryIteratorReq.builder().collectionName(collectionName).expr("id > 0").outputFields(oldFieldNames).batchSize(limit).offset(offset).limit(limit).consistencyLevel(ConsistencyLevel.BOUNDED).build();QueryIterator queryIterator = client.queryIterator(queryReq);List<QueryResultsWrapper.RowRecord> rows = queryIterator.next();queryIterator.close();if (rows.isEmpty()) {log.info("? 數據遷移完成,總記錄數: {}", offset);break;}List<JsonObject> insertRows = new ArrayList<>();for (QueryResultsWrapper.RowRecord row : rows) {JsonObject json = new JsonObject();for (String field : newFieldNames) {if (oldFieldSet.contains(field)) {Object value = row.get(field);if (value instanceof Number) {json.addProperty(field, (Number) value);} else if (value instanceof String) {json.addProperty(field, (String) value);} else if (value instanceof List<?>) {json.add(field, gson.toJsonTree(value));} else {log.warn("? 未處理的字段類型: {} -> {}", field, value);}} else {// 🔧 新增字段統一設置為 null 或默認值json.addProperty(field, "");}}insertRows.add(json);}InsertReq insertReq = InsertReq.builder().collectionName(newCollection).data(insertRows).build();client.insert(insertReq);log.info("? 插入第 {} ~ {} 條數據", offset + 1, offset + rows.size());offset += rows.size();}log.info("? 集合 `{}` 遷移完成", newCollection);return newCollection;}/*** 創建集合 + 為所有向量字段建索引*/public static void createTable(int dim,CreateCollectionReq.CollectionSchema collectionSchema,MilvusClientV2 client, String collectionName, String description) {// 先判斷是否存在,存在先刪除HasCollectionReq name = HasCollectionReq.builder().collectionName(collectionName).build();if (client.hasCollection(name)) {log.info("Collection `{}` 已存在,準備刪除", collectionName);client.dropCollection(DropCollectionReq.builder().collectionName(collectionName)// 集合名稱.timeout(60000L)//進程的超時時長。指定時長到期后,進程將終止, 默認為60000L.build());log.info("Collection `{}` 刪除成功", collectionName);}IndexParam indexParam = IndexParam.builder().fieldName("vector").metricType(IndexParam.MetricType.COSINE) // 檢索時計算向量之間的相似度方式使用 COSINE(余弦相似度),COSINE 適用于歸一化向量的相似度匹配,等價于夾角越小越相似。.build();// 創建 CollectionSchema 和 CollectionCreateCollectionReq createCollectionReq = CreateCollectionReq.builder().collectionSchema(collectionSchema) // 留空表示此集合將使用默認設置創建。要設置具有自定義架構的集合,您需要創建一個CollectionSchema對象并在此處引用它。.autoID(false)// 當數據插入到該集合時,主字段是否自動遞增 ,TRUE自動遞增.collectionName(collectionName)//表名.description(description)//描述.dimension(dim) // 保存向量嵌入的集合字段的維數.metricType("COSINE")//此集合使用的算法用于測量向量嵌入之間的相似性。 該值默認為IP。可能的值為L2、IP和COSINE。有關這些指標類型的詳細信息.indexParams(Collections.singletonList(indexParam)).build();// 創建集合client.createCollection(createCollectionReq);log.info("Collection `{}` 創建完成", collectionName);DescribeCollectionResp resp = client.describeCollection(DescribeCollectionReq.builder().collectionName(collectionName).build());List<CreateCollectionReq.FieldSchema> fieldSchemaList = resp.getCollectionSchema().getFieldSchemaList();log.info("-------------------------【{}】集合結構-----------------------",collectionName);for (CreateCollectionReq.FieldSchema fieldSchema:fieldSchemaList){String field = fieldSchema.getName();String fieldDescription = fieldSchema.getDescription();DataType dataType = fieldSchema.getDataType();log.info("字段名:{},類型:{},備注:{}",field,dataType,fieldDescription);}}}
4. 測試
測試執行:
效果: