Version:
opensearch: 2.11.0
opensearch: 2.12.0
I am getting negative scores and duplicated results for hybrid search.
My configuration is as follows:
The index:
PUT /knn-sample-index
{
"settings": {
"index":{
"knn":"true"
}
},
"mappings": {
"properties": {
"textVector": {
"type": "knn_vector",
"dimension": 5,
"method": {
"engine": "faiss",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 1024,
"m": 64
}
}
},
"imageVector": {
"type": "knn_vector",
"dimension": 5,
"method": {
"engine": "faiss",
"space_type": "innerproduct",
"name": "hnsw",
"parameters": {
"ef_construction": 1024,
"m": 64
}
}
},
"name": {
"type": "text"
}
}
}
}
The data (noticed that the vectors are l2 normalized):
# add data to index
PUT /knn-sample-index/_doc/1
{
"name": "Apple iPhone 13, 128GB, Pink - Unlocked (Renewed)",
"imageVector": [-0.5548, 0.3177, 0.4558, -0.5047, 0.3590],
"textVector": [-0.5313, 0.5175, 0.1438, -0.5471, -0.3605]
}
PUT /knn-sample-index/_doc/2
{
"name": "ASUS Chromebook Plus CX34 Laptop, 14 Display (1920x1080), Intel® Core i3-1215U Processor, 8GB RAM, 256GB UFS Storage, ChromeOS, White, CX3402CBA-DH386-WH",
"imageVector": [0.4884, 0.3328, 0.4026, 0.5094, 0.4786],
"textVector": [0.4003, 0.4308, 0.4791, 0.3798, 0.5295]
}
PUT /knn-sample-index/_doc/3
{
"name": "Sony 50 Inch 4K Ultra HD TV X85K Series: LED Smart Google TV with Dolby Vision HDR and Native 120HZ Refresh Rate KD50X85K- Latest Model, Black",
"imageVector": [0.4387, 0.4179, 0.4221, 0.5298, 0.4173]
}
PUT /knn-sample-index/_doc/4
{
"name": "Amazon Kindle Paperwhite (16 GB) – Now with a larger display, adjustable warm light, increased battery life, and faster page turns – Without Lockscreen Ads – Black",
"textVector": [0.4794, 0.4412, 0.4031, 0.4694, 0.4390]
}
PUT /knn-sample-index/_doc/5
{
"name": "SAMSUNG Galaxy S24+ Plus Cell Phone, 256GB AI Smartphone, Unlocked Android, 50MP Camera, Fastest Processor, Long Battery Life, US Version 2024 Cobalt Violet"
}
The query:
GET knn-sample-index/_search
{
"query": {
"hybrid": {
"queries": [
{
"match": {
"name": {
"query": "Hi world"
}
}
},
{
"knn": {
"imageVector": {
"vector": [1,1,1,1,1],
"k": 3
}
}
}
]
}
}
}
And the query output:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.073,
"hits": [
{
"_index": "knn-sample-index",
"_id": "1",
"_score": -9549511700,
"_source": {
"name": "Apple iPhone 13, 128GB, Pink - Unlocked (Renewed)",
"imageVector": [
-0.5548,
0.3177,
0.4558,
-0.5047,
0.359
],
"textVector": [
-0.5313,
0.5175,
0.1438,
-0.5471,
-0.3605
]
}
},
{
"_index": "knn-sample-index",
"_id": "1",
"_score": -4422440400,
"_source": {
"name": "Apple iPhone 13, 128GB, Pink - Unlocked (Renewed)",
"imageVector": [
-0.5548,
0.3177,
0.4558,
-0.5047,
0.359
],
"textVector": [
-0.5313,
0.5175,
0.1438,
-0.5471,
-0.3605
]
}
},
{
"_index": "knn-sample-index",
"_id": "1",
"_score": -4422440400,
"_source": {
"name": "Apple iPhone 13, 128GB, Pink - Unlocked (Renewed)",
"imageVector": [
-0.5548,
0.3177,
0.4558,
-0.5047,
0.359
],
"textVector": [
-0.5313,
0.5175,
0.1438,
-0.5471,
-0.3605
]
}
},
{
"_index": "knn-sample-index",
"_id": "1",
"_score": 1.073,
"_source": {
"name": "Apple iPhone 13, 128GB, Pink - Unlocked (Renewed)",
"imageVector": [
-0.5548,
0.3177,
0.4558,
-0.5047,
0.359
],
"textVector": [
-0.5313,
0.5175,
0.1438,
-0.5471,
-0.3605
]
}
},
{
"_index": "knn-sample-index",
"_id": "1",
"_score": -9549511700,
"_source": {
"name": "Apple iPhone 13, 128GB, Pink - Unlocked (Renewed)",
"imageVector": [
-0.5548,
0.3177,
0.4558,
-0.5047,
0.359
],
"textVector": [
-0.5313,
0.5175,
0.1438,
-0.5471,
-0.3605
]
}
}
]
}
}
Notice that there are negative scores and duplicated results.
If I take the hybrid search out and perform a simple knn query, it works fine.
Appreciate the help