So it turns out my problem was due to the lack of specifying a normalization-processor in my search. If I alter my query to include a search pipeline, such as the following, it fixes the problem:
, "search_pipeline" : {
"phase_results_processors": [
{
"normalization-processor": {
"normalization": {
"technique": "min_max"
}
, "combination": {
"technique": "arithmetic_mean"
, "parameters": {
"weights": [0.3, 0.7]
}
}
, "ignore_failure": false
}
}
]
}
So if I change my DSL to the following, I no longer see the duplicate results:
{
"query": {
"hybrid": {
"queries": [
{
"bool": {
"must": [
{
"function_score": {
"query": {
"bool": {
"should": [
{
"simple_query_string": {
"query": "my search phrase",
"fields": ["keywords^4", "title^2", "article.basic", "article.intermediate", "article.advanced"],
"quote_field_suffix": "exact",
"default_operator": "OR",
"minimum_should_match": "2<75%"
}
},
{
"nested": {
"path": "attachments",
"query": {
"simple_query_string": {
"query": "my search phrase",
"fields": [
"attachments.attachment.content"
],
"quote_field_suffix": "exact",
"default_operator": "OR",
"minimum_should_match": "2<75%"
}
},
"inner_hits": {
"_source": false,
"highlight": {
"type": "unified",
"fragmenter": "span",
"number_of_fragments": 1,
"encoder": "html",
"pre_tags": [
"<span class=\"search-results-highlight\">"
],
"post_tags": [
"</span>"
],
"fragment_size": 200,
"no_match_size": 0,
"order": "score",
"fields": {
"attachments.*": {}
}
}
}
}
}
]
}
},
"functions": [
{
"field_value_factor": {
"field": "rating",
"factor": 0.25,
"modifier": "log1p",
"missing": 0
}
},
{
"field_value_factor": {
"field": "usedcount",
"factor": 0.375,
"modifier": "log1p",
"missing": 0
}
},
{
"gauss": {
"modifieddate": {
"origin": "now",
"scale": "30d",
"offset": "5d",
"decay": 0.5
}
}
}
],
"boost_mode": "sum",
"score_mode": "sum"
}
}
],
"filter": [
{
"bool": {
"must": [
{
"bool": {
"should": [
{
"terms": {
"statusid": [
3
]
}
},
{
"bool": {
"must": [
{
"term": {
"ishot": true
}
},
{
"terms": {
"statusid": [
1,
2
]
}
}
]
}
}
]
}
},
{
"terms": {
"servicedeskid": [
1
]
}
},
{
"terms": {
"securitylevelid": [
-1,
6,
7,
8
]
}
}
]
}
}
]
}
},
{
"bool": {
"must": [
{
"function_score": {
"query": {
"bool": {
"should": [
{
"neural_sparse": {
"embeddings.title": {
"query_text": "my search phrase",
"model_id": "Zjo6tY4Bo7jGFiU7IP8Q"
}
}
},
{
"neural_sparse": {
"embeddings.article.basic": {
"query_text": "my search phrase",
"model_id": "Zjo6tY4Bo7jGFiU7IP8Q"
}
}
},
{
"neural_sparse": {
"embeddings.article.intermediate": {
"query_text": "my search phrase",
"model_id": "Zjo6tY4Bo7jGFiU7IP8Q"
}
}
},
{
"neural_sparse": {
"embeddings.article.advanced": {
"query_text": "my search phrase",
"model_id": "Zjo6tY4Bo7jGFiU7IP8Q"
}
}
}
]
}
},
"functions": [
{
"field_value_factor": {
"field": "rating",
"factor": 0.25,
"modifier": "log1p",
"missing": 0
}
},
{
"field_value_factor": {
"field": "usedcount",
"factor": 0.375,
"modifier": "log1p",
"missing": 0
}
},
{
"gauss": {
"modifieddate": {
"origin": "now",
"scale": "30d",
"offset": "5d",
"decay": 0.5
}
}
}
],
"boost_mode": "sum",
"score_mode": "sum"
}
}
],
"filter": [
{
"bool": {
"must": [
{
"bool": {
"should": [
{
"terms": {
"statusid": [
3
]
}
},
{
"bool": {
"must": [
{
"term": {
"ishot": true
}
},
{
"terms": {
"statusid": [
1,
2
]
}
}
]
}
}
]
}
},
{
"terms": {
"servicedeskid": [
1
]
}
},
{
"terms": {
"securitylevelid": [
-1,
6,
7,
8
]
}
}
]
}
}
]
}
}
]
}
},
"search_pipeline" : {
"phase_results_processors": [
{
"normalization-processor": {
"normalization": {
"technique": "min_max"
}
, "combination": {
"technique": "arithmetic_mean"
, "parameters": {
"weights": [0.3, 0.7]
}
}
, "ignore_failure": false
}
}
]
},
"from": 0,
"size": 50,
"explain": false,
"highlight": {
"type": "unified",
"fragmenter": "span",
"number_of_fragments": 1,
"encoder": "html",
"pre_tags": [
"<span class=\"search-results-highlight\">"
],
"post_tags": [
"</span>"
],
"fragment_size": 200,
"no_match_size": 0,
"order": "score",
"fields": {
"article.basic": {
"no_match_size": 200
},
"article.intermediate": {},
"article.advanced": {},
"attachments.*": {}
}
},
"sort": [
{
"_score": "desc"
},
{
"title.sort": "asc"
}
]
}
No obviously you can create a pipeline, but for this use cases I’m just defining a temporary pipeline to show the fix.