Versions (relevant - OpenSearch/Dashboard/Server OS/Browser): OpenSearch 2.17 and 2.19
Describe the issue: The normalization preprocessor does not work with fully nested hybrid queries. It adds scores from different candidate sets, which is the same functionality as not defining any processor, as opposed to normalizing with a weighted sum.
Configuration:
PUT nested-index
{
"settings": {
"index.knn": true,
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"company_id": {
"type": "keyword"
},
"listings": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"description": {
"type": "text",
"analyzer": "english",
"search_analyzer": "english"
},
"description_embedding": {
"type": "knn_vector",
"dimension": 5,
"method": {
"engine": "lucene",
"space_type": "cosinesimil",
"name": "hnsw",
"parameters": {}
}
}
}
}
}
}
}
PUT nested-index/_doc/1
{
"company_id": "company1",
"listings": [
{
"id" : "company1_listing1",
"description" : "This is an awesome company located in San Diego, CA.",
"description_embedding": [1,2,3,4,5]
},
{
"id" : "company1_listing2",
"description" : "This is a manufacturing facility in Michigan.",
"description_embedding": [6,7,8,9,10]
}
]
}
PUT nested-index/_doc/2
{
"company_id": "company2",
"listings": [
{
"id" : "company2_listing1",
"description" : "This company makes surf boards.",
"description_embedding": [1,3,5,7,9]
},
{
"id" : "company2_listing2",
"description" : "This company makes skate boards.",
"description_embedding": [2,4,6,8,10]
}
]
}
Relevant Logs or Screenshots:
BM25 Query and Results
GET /nested-index/_search
{
"size": 2,
"_source": {
"excludes": [
"listings"
]
},
"query": {
"nested": {
"path": "listings",
"score_mode": "max",
"inner_hits": {
"size": 2,
"_source": {
"includes": [
"listings.id"
]
}
},
"query": {
"match": {
"listings.description": "Skate board company manufacturer in Michigan"
}
}
}
}
}
{
"took": 603,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 2.7373,
"hits": [
{
"_index": "nested-index",
"_id": "1",
"_score": 2.7373,
"_source": {
"company_id": "company1"
},
"inner_hits": {
"listings": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 2.7373,
"hits": [
{
"_index": "nested-index",
"_id": "1",
"_nested": {
"field": "listings",
"offset": 1
},
"_score": 2.7373,
"_source": {
"id": "company1_listing2"
}
},
{
"_index": "nested-index",
"_id": "1",
"_nested": {
"field": "listings",
"offset": 0
},
"_score": 0.305255,
"_source": {
"id": "company1_listing1"
}
}
]
}
}
}
},
{
"_index": "nested-index",
"_id": "2",
"_score": 2.3093681,
"_source": {
"company_id": "company2"
},
"inner_hits": {
"listings": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 2.3093681,
"hits": [
{
"_index": "nested-index",
"_id": "2",
"_nested": {
"field": "listings",
"offset": 1
},
"_score": 2.3093681,
"_source": {
"id": "company2_listing2"
}
},
{
"_index": "nested-index",
"_id": "2",
"_nested": {
"field": "listings",
"offset": 0
},
"_score": 1.0757082,
"_source": {
"id": "company2_listing1"
}
}
]
}
}
}
}
]
}
}
KNN Query and Results
GET /nested-index/_search
{
"size": 2,
"_source": {
"excludes": [
"listings"
]
},
"query": {
"nested": {
"path": "listings",
"score_mode": "max",
"inner_hits": {
"size": 2,
"_source": {
"includes": [
"listings.id"
]
}
},
"query": {
"knn": {
"listings.description_embedding": {
"vector": [1,2,3,4,5],
"k": 2
}
}
}
}
}
}
{
"took": 232,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "nested-index",
"_id": "1",
"_score": 1,
"_source": {
"company_id": "company1"
},
"inner_hits": {
"listings": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "nested-index",
"_id": "1",
"_nested": {
"field": "listings",
"offset": 0
},
"_score": 1,
"_source": {
"id": "company1_listing1"
}
}
]
}
}
}
},
{
"_index": "nested-index",
"_id": "2",
"_score": 1,
"_source": {
"company_id": "company2"
},
"inner_hits": {
"listings": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1,
"hits": [
{
"_index": "nested-index",
"_id": "2",
"_nested": {
"field": "listings",
"offset": 1
},
"_score": 1,
"_source": {
"id": "company2_listing2"
}
}
]
}
}
}
}
]
}
}
Hybrid Query and Results (where setting search_pipeline=_none
produces the same results)
PUT /_search/pipeline/normalizer
{
"description": "Post processor for hybrid search",
"phase_results_processors": [
{
"normalization-processor": {
"normalization": {
"technique": "min_max"
},
"combination": {
"technique": "arithmetic_mean",
"parameters": {
"weights": [
0.5,
0.5
]
}
}
}
}
]
}
GET /nested-index/_search?search_pipeline=normalizer
{
"size": 2,
"_source": {
"excludes": [
"listings"
]
},
"query": {
"nested": {
"path": "listings",
"score_mode": "max",
"inner_hits": {
"size": 2,
"_source": {
"includes": [
"listings.id"
]
}
},
"query": {
"hybrid": {
"queries": [
{
"match": {
"listings.description": "Skate board company manufacturer in Michigan"
}
},
{
"knn": {
"listings.description_embedding": {
"vector": [1,2,3,4,5],
"k": 2
}
}
}
]
}
}
}
}
}
{
"took": 47,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 3.3093681,
"hits": [
{
"_index": "nested-index",
"_id": "2",
"_score": 3.3093681,
"_source": {
"company_id": "company2"
},
"inner_hits": {
"listings": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 3.3093681,
"hits": [
{
"_index": "nested-index",
"_id": "2",
"_nested": {
"field": "listings",
"offset": 1
},
"_score": 3.3093681,
"_source": {
"id": "company2_listing2"
}
},
{
"_index": "nested-index",
"_id": "2",
"_nested": {
"field": "listings",
"offset": 0
},
"_score": 1.0757082,
"_source": {
"id": "company2_listing1"
}
}
]
}
}
}
},
{
"_index": "nested-index",
"_id": "1",
"_score": 2.7373,
"_source": {
"company_id": "company1"
},
"inner_hits": {
"listings": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 2.7373,
"hits": [
{
"_index": "nested-index",
"_id": "1",
"_nested": {
"field": "listings",
"offset": 1
},
"_score": 2.7373,
"_source": {
"id": "company1_listing2"
}
},
{
"_index": "nested-index",
"_id": "1",
"_nested": {
"field": "listings",
"offset": 0
},
"_score": 1.3052549,
"_source": {
"id": "company1_listing1"
}
}
]
}
}
}
}
]
}
}