2024-10-14 18:13:12 +00:00
|
|
|
module.exports = client => {
|
|
|
|
const constant = require('./constant'),
|
|
|
|
schedule = require('node-schedule'),
|
2024-10-19 18:37:54 +00:00
|
|
|
{ pick } = require('stream-json/filters/Pick'),
|
|
|
|
{ parser } = require('stream-json'),
|
|
|
|
{ streamArray } = require('stream-json/streamers/StreamArray'),
|
|
|
|
{ chain } = require('stream-chain'),
|
2024-10-14 18:13:12 +00:00
|
|
|
size = 500,
|
|
|
|
deleteDup = async () => {
|
2024-10-19 18:37:54 +00:00
|
|
|
const count = { deleted: 0, total: 0, current: 0 }
|
|
|
|
let lastsort = undefined, last = undefined
|
|
|
|
await searchDup(count, lastsort, last)
|
|
|
|
},
|
|
|
|
searchDup = async (count, lastsort, last) => {
|
|
|
|
count.current = 0
|
|
|
|
const result = await client.search({
|
|
|
|
index: constant.index,
|
|
|
|
size: size,
|
|
|
|
body: {
|
|
|
|
query: {
|
|
|
|
match_all: {}
|
|
|
|
},
|
|
|
|
sort: [{
|
|
|
|
"instance": {
|
|
|
|
"order": "asc"
|
2024-10-14 18:13:12 +00:00
|
|
|
},
|
2024-10-19 18:37:54 +00:00
|
|
|
"last": {
|
|
|
|
"order": "desc",
|
|
|
|
"numeric_type": "date_nanos",
|
|
|
|
"format": "strict_date_optional_time_nanos"
|
|
|
|
}
|
|
|
|
}],
|
|
|
|
search_after: lastsort
|
|
|
|
}
|
|
|
|
}, { asStream: true, meta: false }),
|
|
|
|
pipeline = chain([
|
|
|
|
parser(),
|
|
|
|
pick({ filter: 'hits.hits' }),
|
|
|
|
streamArray(),
|
|
|
|
data => data.value
|
|
|
|
])
|
|
|
|
pipeline.on('data', async data => {
|
|
|
|
count.current++
|
|
|
|
if (last && last.instance === data._source.instance) {
|
|
|
|
await client.delete({ index: constant.index, id: data._id })
|
|
|
|
count.deleted++
|
|
|
|
console.log('deleted ' + data._id + ': ' + data._source.instance)
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
last = data._source
|
|
|
|
}
|
|
|
|
if (count.current === size) {
|
|
|
|
lastsort = data.sort
|
2024-10-14 18:13:12 +00:00
|
|
|
}
|
2024-10-19 18:37:54 +00:00
|
|
|
})
|
|
|
|
pipeline.on('end', async () => {
|
2024-10-20 05:36:05 +00:00
|
|
|
count.total += count.current
|
2024-10-19 18:37:54 +00:00
|
|
|
if (count.current === size) {
|
|
|
|
await searchDup(count, lastsort, last)
|
|
|
|
} else {
|
|
|
|
console.log('Index: ' + constant.index + ' - Total: ' + count.total + ' - Deleted: ' + count.deleted)
|
2024-10-14 18:13:12 +00:00
|
|
|
}
|
2024-10-19 18:37:54 +00:00
|
|
|
})
|
|
|
|
result.pipe(pipeline)
|
2024-10-14 18:13:12 +00:00
|
|
|
},
|
|
|
|
job = schedule.scheduleJob('0 ' + constant.taskdeletedup + ' * * *', async () => {
|
|
|
|
await deleteDup()
|
|
|
|
})
|
|
|
|
}
|