Versions (relevant - OpenSearch/Dashboard/Server OS/Browser):
opensearch-project/opensearch@2.3.1
Describe the issue:
Hi everyone,
I’m reaching out for help with using the sampler aggregation in OpenSearch.
I’m applying filters for the time range and ensuring that I only retrieve logs that contain a specific field.
I’ve also set the terminate_after
flag to 200K documents, so I expect to get logs with that field and its values for larger queries.
However, when I run the sampler aggregation with a shard_size
of 25K, I’m getting empty buckets in the results.
This seems odd since I should be sampling from the 200K documents where the field exists.
Can anyone explain why this might be happening? I’d really appreciate any insights!
Thanks!
this is my query:
{
"params": {
"index": "XXX*",
"body": {
"track_total_hits": true,
"version": true,
"_source": {
"includes": [
"type"
],
},
"size": 0,
"from": 0,
"query": {
"bool": {
"filter": [
{
"range": {
"@timestamp": {
"format": "strict_date_optional_time",
"gte": "now-1d",
"lte": "now"
}
}
},
{
"exists": {
"field": "type"
}
}
]
}
},
"aggs": {
"sample_agg": {
"sampler": {
"shard_size": 25000
},
"aggs": {
"filtered_sample": {
"filter": {
"bool": {
"must": []
}
},
"aggs": {
"total_count": {
"value_count": {
"field": "type"
}
},
"top_values": {
"terms": {
"field": "type",
"size": 50,
"shard_size": 125,
"order": {
"_count": "desc"
}
}
}
}
},
"unfiltered_sample": {
"terms": {
"field": "type",
"size": 50,
"shard_size": 125,
"order": {
"_count": "desc"
}
}
}
}
}
},
"timeout": "1500ms",
"terminate_after": 200000
}
}
}