I have two clusters:
PROM: version OpenSearch + OpensearchDashboards 2.3.0
TEST: version OpenSearch + OpensearchDashboards 2.7.0 (updated from 2.3.0, PROM mirror)
cluster settings are the same
I do the same actions with the same index in “Dev Tools”:
POST _reindex
{
"source": {
"index": "elk_12-test-1"
},
"dest": {
"index": "elk_12-test-2"
}
}
On PROM I see one task
GET /_tasks?detailed=true&actions=*reindex*
{
"nodes" : {
"uwuvGpUfSOOTLKVVMBB_0Q" : {
"name" : "node_name",
"transport_address" : "xxx.xxx.xxx.xxx:9300",
"host" : "xxx.xxx.xxx.xxx",
"ip" : "xxx.xxx.xxx.xxx:9300",
"roles" : [
"ingest",
"remote_cluster_client"
],
"attributes" : {
"zone" : "vlg",
"shard_indexing_pressure_enabled" : "true"
},
"tasks" : {
"uwuvGpUfSOOTLKVVMBB_0Q:256784238" : {
"node" : "uwuvGpUfSOOTLKVVMBB_0Q",
"id" : 256784238,
"type" : "transport",
"action" : "indices:data/write/reindex",
"status" : {
"total" : 4131710,
"updated" : 0,
"created" : 2703000,
"deleted" : 0,
"batches" : 2704,
"version_conflicts" : 0,
"noops" : 0,
"retries" : {
"bulk" : 0,
"search" : 0
},
"throttled_millis" : 0,
"requests_per_second" : -1.0,
"throttled_until_millis" : 0
},
"description" : "reindex from [elk_12-test-1] to [elk_12-test-2]",
"start_time_in_millis" : 1684114612633,
"running_time_in_nanos" : 1263598777066,
"cancellable" : true,
"cancelled" : false,
"headers" : { },
"resource_stats" : {
"total" : {
"cpu_time_in_nanos" : 0,
"memory_in_bytes" : 0
}
}
}
}
}
}
}
When the task completed i see two identical indices
GET _cat/indices/elk_12-test-*?v&s=index
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
green open elk_12-test-1 gm8j0speTg2eDlyszyoPGQ 1 1 4131710 0 3.3gb 1.7gb
green open elk_12-test-2 oRlt73zDQzGmbImHR8HxyA 1 1 4131710 0 3.3gb 1.7gb
In TEST, the same actions lead to the creation of four tasks
GET /_tasks?detailed=true&actions=*reindex*
{
"nodes": {
"JIy8hrGuSoWNE5KDeNCBnQ": {
"name": "vlg-node_name",
"transport_address": "xxx.xxx.xxx.xx1:9300",
"host": "xxx.xxx.xxx.xx1",
"ip": "xxx.xxx.xxx.xx1:9300",
"roles": [
"cluster_manager",
"data"
],
"attributes": {
"type": "hist",
"zone": "vlg",
"shard_indexing_pressure_enabled": "true"
},
"tasks": {
"JIy8hrGuSoWNE5KDeNCBnQ:47392246": {
"node": "JIy8hrGuSoWNE5KDeNCBnQ",
"id": 47392246,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 4131710,
"updated": 192000,
"created": 711000,
"deleted": 0,
"batches": 904,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [elk_12-test-1] to [elk_12-test-2]",
"start_time_in_millis": 1684120039895,
"running_time_in_nanos": 250233204932,
"cancellable": true,
"cancelled": false,
"headers": {
"X-Opaque-Id": "bb70ea0c-6423-4d67-a04b-d0cccbe2d9f4"
},
"resource_stats": {
"total": {
"cpu_time_in_nanos": 0,
"memory_in_bytes": 0
}
}
},
"JIy8hrGuSoWNE5KDeNCBnQ:47394749": {
"node": "JIy8hrGuSoWNE5KDeNCBnQ",
"id": 47394749,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 4131710,
"updated": 706000,
"created": 0,
"deleted": 0,
"batches": 707,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [elk_12-test-1] to [elk_12-test-2]",
"start_time_in_millis": 1684120099897,
"running_time_in_nanos": 190231243720,
"cancellable": true,
"cancelled": false,
"headers": {
"X-Opaque-Id": "bb70ea0c-6423-4d67-a04b-d0cccbe2d9f4"
},
"resource_stats": {
"total": {
"cpu_time_in_nanos": 0,
"memory_in_bytes": 0
}
}
}
}
},
"teEcyYF5RkOkLI-3O1kLPA": {
"name": "kvk-node_name",
"transport_address": "xxx.xxx.xxx.xx2:9300",
"host": "xxx.xxx.xxx.xx2",
"ip": "xxx.xxx.xxx.xx2:9300",
"roles": [
"cluster_manager",
"data"
],
"attributes": {
"zone": "kvk",
"shard_indexing_pressure_enabled": "true",
"type": "hist"
},
"tasks": {
"teEcyYF5RkOkLI-3O1kLPA:62329456": {
"node": "teEcyYF5RkOkLI-3O1kLPA",
"id": 62329456,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 4131710,
"updated": 248000,
"created": 0,
"deleted": 0,
"batches": 249,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [elk_12-test-1] to [elk_12-test-2]",
"start_time_in_millis": 1684120159884,
"running_time_in_nanos": 130226594041,
"cancellable": true,
"cancelled": false,
"headers": {
"X-Opaque-Id": "bb70ea0c-6423-4d67-a04b-d0cccbe2d9f4"
},
"resource_stats": {
"total": {
"cpu_time_in_nanos": 0,
"memory_in_bytes": 0
}
}
},
"teEcyYF5RkOkLI-3O1kLPA:62318811": {
"node": "teEcyYF5RkOkLI-3O1kLPA",
"id": 62318811,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 4131710,
"updated": 385000,
"created": 192000,
"deleted": 0,
"batches": 578,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [elk_12-test-1] to [elk_12-test-2]",
"start_time_in_millis": 1684119979799,
"running_time_in_nanos": 310311123109,
"cancellable": true,
"cancelled": false,
"headers": {
"X-Opaque-Id": "bb70ea0c-6423-4d67-a04b-d0cccbe2d9f4"
},
"resource_stats": {
"total": {
"cpu_time_in_nanos": 0,
"memory_in_bytes": 0
}
}
}
}
}
}
}
This causes a lot of updates, which I then see as deleted documents
GET _cat/indices/elk_12-test-*?v&s=index
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
green open elk_12-test-1 BKdnoMW-SwmWvNm9O991QA 1 1 4131710 0 2.3gb 1.1gb
green open elk_12-test-2 A1wCcTqMRr-IpRlPO0gUGQ 1 1 4131710 1503420 3gb 1.6gb
If I execute a query through the API, then this behavior is not observed, the index is created without “garbage” and much faster in time
Summary
request:
curl -XPOST -u username -H 'Content-Type: application/json' 'https://vlg-node_name:9200/_reindex' -d '{ "source": { "index": "elk_12-test-1"}, "dest": { "index": "elk_12-test-3" } }'
{"took":1920489,"timed_out":false,"total":4131710,"updated":0,"created":4131710,"deleted":0,"batches":4132,"version_conflicts":0,"noops":0,"retries":{"bulk":0,"search":0},"throttled_millis":0,"requests_per_second":-1.0,"throttled_until_millis":0,"failures":[]}
One task:
GET /_tasks?detailed=true&actions=*reindex*
{
"nodes": {
"JIy8hrGuSoWNE5KDeNCBnQ": {
"name": "vlg-node_name",
"transport_address": "xxx.xxx.xxx.xx1:9300",
"host": "xxx.xxx.xxx.xx1",
"ip": "xxx.xxx.xxx.xx1:9300",
"roles": [
"cluster_manager",
"data"
],
"attributes": {
"type": "hist",
"zone": "vlg",
"shard_indexing_pressure_enabled": "true"
},
"tasks": {
"JIy8hrGuSoWNE5KDeNCBnQ:47463338": {
"node": "JIy8hrGuSoWNE5KDeNCBnQ",
"id": 47463338,
"type": "transport",
"action": "indices:data/write/reindex",
"status": {
"total": 4131710,
"updated": 0,
"created": 454000,
"deleted": 0,
"batches": 455,
"version_conflicts": 0,
"noops": 0,
"retries": {
"bulk": 0,
"search": 0
},
"throttled_millis": 0,
"requests_per_second": -1,
"throttled_until_millis": 0
},
"description": "reindex from [elk_12-test-1] to [elk_12-test-3]",
"start_time_in_millis": 1684121585862,
"running_time_in_nanos": 208521588983,
"cancellable": true,
"cancelled": false,
"headers": {},
"resource_stats": {
"total": {
"cpu_time_in_nanos": 0,
"memory_in_bytes": 0
}
}
}
}
}
}
}
Indices:
GET _cat/indices/elk_12-test-*?v&s=index
green open elk_12-test-1 BKdnoMW-SwmWvNm9O991QA 1 1 4131710 0 2.3gb 1.1gb
green open elk_12-test-2 A1wCcTqMRr-IpRlPO0gUGQ 1 1 4131710 1503420 3gb 1.6gb
green open elk_12-test-3 iVsgJ_G4Tn6NPAanUPL8hw 1 1 4131710 0 2.3gb 1.1gb
Looks like OpensearchDashboards 2.7 is sending multiple requests. In the old version, this behavior was not observed.