Describe the issue:
In the Opensearch cluster, a large number of index primary shards and duplicate shards have different sizes, resulting in different query data.
Configuration:
The configuration of 12 primary shards and 1 replica is used for no index.
2025-01-06T11:24:43,236][ERROR][o.o.i.r.SegmentReplicationTargetService] [vm-gs-ols-hot-prod-039.prd.ols.gs.internal.vgcserv.com.cn] [shardId [prd-aks-corebe-webapp-logs-2025.01.03.02][5]] [replication id 54873] Replication failed, timing data: {INIT=0, GET_CHECKPOINT_INFO=1, FILE_DIFF=1356, REPLICATING=0, GET_FILES=1}
Jan 6, 2025 @ 11:25:12.783 org.opensearch.indices.replication.common.ReplicationFailedException: Segment Replication failed at org.opensearch.indices.replication.SegmentReplicationTargetService$3.onFailure(SegmentReplicationTargetService.java:590) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$1.onFailure(ActionListener.java:90) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:84) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture$1.doRun(ListenableFuture.java:126) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.OpenSearchExecutors$DirectExecutorService.execute(OpenSearchExecutors.java:343) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture.notifyListener(ListenableFuture.java:120) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture.lambda$done$0(ListenableFuture.java:112) [opensearch-2.13.0.jar:2.13.0] at java.base/java.util.ArrayList.forEach(ArrayList.java:1596) [?:?] at org.opensearch.common.util.concurrent.ListenableFuture.done(ListenableFuture.java:112) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.BaseFuture.set(BaseFuture.java:160) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture.onResponse(ListenableFuture.java:141) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.action.StepListener.innerOnResponse(StepListener.java:79) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.core.action.NotifyOnceListener.onResponse(NotifyOnceListener.java:58) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$4.onResponse(ActionListener.java:182) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$6.onResponse(ActionListener.java:301) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.action.support.RetryableAction$RetryingListener.onResponse(RetryableAction.java:183) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.action.ActionListenerResponseHandler.handleResponse(ActionListenerResponseHandler.java:70) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.security.transport.SecurityInterceptor$RestoringTransportResponseHandler.handleResponse(SecurityInterceptor.java:398) [opensearch-security-2.13.0.0.jar:2.13.0.0] at org.opensearch.transport.TransportService$ContextRestoreResponseHandler.handleResponse(TransportService.java:1505) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.transport.InboundHandler.doHandleResponse(InboundHandler.java:420) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.transport.InboundHandler.lambda$handleResponse$3(InboundHandler.java:414) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:854) [opensearch-2.13.0.jar:2.13.0] at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?] at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?] at java.base/java.lang.Thread.run(Thread.java:1583) [?:?] Caused by: org.opensearch.indices.replication.common.ReplicationFailedException: java.nio.file.NoSuchFileException: /opt/ols/data/nodes/0/indices/pnQzYkN3QDi43-fgKOOLjQ/5/index/_138.si at org.opensearch.indices.replication.SegmentReplicationTarget.finalizeReplication(SegmentReplicationTarget.java:316) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.indices.replication.SegmentReplicationTarget.lambda$startReplication$3(SegmentReplicationTarget.java:184) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) ~[opensearch-core-2.13.0.jar:2.13.0] … 23 more Caused by: java.nio.file.NoSuchFileException: /opt/ols/data/nodes/0/indices/pnQzYkN3QDi43-fgKOOLjQ/5/index/_138.si at java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92) ~[?:?] at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106) ~[?:?] at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111) ~[?:?] at java.base/sun.nio.fs.UnixFileSystemProvider.newFileChannel(UnixFileSystemProvider.java:224) ~[?:?] at java.base/java.nio.channels.FileChannel.open(FileChannel.java:309) ~[?:?] at java.base/java.nio.channels.FileChannel.open(FileChannel.java:369) ~[?:?] at org.apache.lucene.store.NIOFSDirectory.openInput(NIOFSDirectory.java:78) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.opensearch.index.store.FsDirectoryFactory$HybridDirectory.openInput(FsDirectoryFactory.java:183) ~[opensearch-2.13.0.jar:2.13.0] at org.apache.lucene.store.FilterDirectory.openInput(FilterDirectory.java:101) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.store.FilterDirectory.openInput(FilterDirectory.java:101) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.store.Directory.openChecksumInput(Directory.java:156) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat.read(Lucene99SegmentInfoFormat.java:94) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.index.SegmentInfos.parseSegmentInfos(SegmentInfos.java:411) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:368) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:315) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.opensearch.index.store.Store.buildSegmentInfos(Store.java:820) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.indices.replication.SegmentReplicationTarget.finalizeReplication(SegmentReplicationTarget.java:285) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.indices.replication.SegmentReplicationTarget.lambda$startReplication$3(SegmentReplicationTarget.java:184) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) ~[opensearch-core-2.13.0.jar:2.13.0] … 23 more Suppressed: org.apache.lucene.index.CorruptIndexException: checksum passed (3b2a4f7d). possibly transient resource issue, or a Lucene or JVM bug (resource=BufferedChecksumIndexInput(Snapshot of SegmentInfos)) at org.apache.lucene.codecs.CodecUtil.checkFooter(CodecUtil.java:501) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:375) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:315) ~[lucene-core-9.10.0.jar:9.10.0 695c0ac84508438302cd346a812cfa2fdc5a10df - 2024-02-14 16:48:06] at org.opensearch.index.store.Store.buildSegmentInfos(Store.java:820) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.indices.replication.SegmentReplicationTarget.finalizeReplication(SegmentReplicationTarget.java:285) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.indices.replication.SegmentReplicationTarget.lambda$startReplication$3(SegmentReplicationTarget.java:184) ~[opensearch-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$1.onResponse(ActionListener.java:82) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture$1.doRun(ListenableFuture.java:126) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:52) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.OpenSearchExecutors$DirectExecutorService.execute(OpenSearchExecutors.java:343) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture.notifyListener(ListenableFuture.java:120) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture.lambda$done$0(ListenableFuture.java:112) [opensearch-2.13.0.jar:2.13.0] at java.base/java.util.ArrayList.forEach(ArrayList.java:1596) [?:?] at org.opensearch.common.util.concurrent.ListenableFuture.done(ListenableFuture.java:112) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.BaseFuture.set(BaseFuture.java:160) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ListenableFuture.onResponse(ListenableFuture.java:141) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.action.StepListener.innerOnResponse(StepListener.java:79) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.core.action.NotifyOnceListener.onResponse(NotifyOnceListener.java:58) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$4.onResponse(ActionListener.java:182) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.core.action.ActionListener$6.onResponse(ActionListener.java:301) [opensearch-core-2.13.0.jar:2.13.0] at org.opensearch.action.support.RetryableAction$RetryingListener.onResponse(RetryableAction.java:183) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.action.ActionListenerResponseHandler.handleResponse(ActionListenerResponseHandler.java:70) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.security.transport.SecurityInterceptor$RestoringTransportResponseHandler.handleResponse(SecurityInterceptor.java:398) [opensearch-security-2.13.0.0.jar:2.13.0.0] at org.opensearch.transport.TransportService$ContextRestoreResponseHandler.handleResponse(TransportService.java:1505) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.transport.InboundHandler.doHandleResponse(InboundHandler.java:420) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.transport.InboundHandler.lambda$handleResponse$3(InboundHandler.java:414) [opensearch-2.13.0.jar:2.13.0] at org.opensearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:854) [opensearch-2.13.0.jar:2.13.0] at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) [?:?] at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) [?:?] at java.base/java.lang.Thread.run(Thread.java:1583) [?:?]