taskdeletedup with streams
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
ale 2024-10-19 20:37:54 +02:00
parent b8f927be2b
commit 339ab9c192

View File

@ -1,50 +1,67 @@
module.exports = client => { module.exports = client => {
const constant = require('./constant'), const constant = require('./constant'),
schedule = require('node-schedule'), schedule = require('node-schedule'),
{ pick } = require('stream-json/filters/Pick'),
{ parser } = require('stream-json'),
{ streamArray } = require('stream-json/streamers/StreamArray'),
{ chain } = require('stream-chain'),
size = 500, size = 500,
deleteDup = async () => { deleteDup = async () => {
const count = { deleted: 0, total: 0 } const count = { deleted: 0, total: 0, current: 0 }
let lastsort = undefined, result = undefined, last = undefined let lastsort = undefined, last = undefined
do { await searchDup(count, lastsort, last)
result = await client.search({ },
index: constant.index, searchDup = async (count, lastsort, last) => {
size: size, count.current = 0
body: { const result = await client.search({
query: { index: constant.index,
match_all: {} size: size,
body: {
query: {
match_all: {}
},
sort: [{
"instance": {
"order": "asc"
}, },
sort: [{ "last": {
"instance": { "order": "desc",
"order": "asc" "numeric_type": "date_nanos",
}, "format": "strict_date_optional_time_nanos"
"last": { }
"order": "desc", }],
"numeric_type": "date_nanos", search_after: lastsort
"format": "strict_date_optional_time_nanos"
}
}],
search_after: lastsort
}
})
for (const instance of result.hits.hits) {
if (last && instance._source && last.instance === instance._source.instance) {
await client.delete({ index: constant.index, id: instance._id })
count.deleted++
console.log('deleted ' + instance._id + ': ' + instance._source.instance)
}
else {
last = instance._source
}
if (instance._id === result.hits.hits[result.hits.hits.length - 1]._id) {
lastsort = instance.sort
}
} }
count.total += result.hits.hits.length }, { asStream: true, meta: false }),
if (result.hits.hits.length !== size) { pipeline = chain([
break parser(),
pick({ filter: 'hits.hits' }),
streamArray(),
data => data.value
])
pipeline.on('data', async data => {
count.current++
if (last && last.instance === data._source.instance) {
await client.delete({ index: constant.index, id: data._id })
count.deleted++
console.log('deleted ' + data._id + ': ' + data._source.instance)
} }
} while (result.hits && result.hits.hits && result.hits.hits.length > 0) else {
console.log('Index: ' + constant.index + ' - Total: ' + count.total + ' - Deleted: ' + count.deleted) last = data._source
}
if (count.current === size) {
lastsort = data.sort
}
})
pipeline.on('end', async () => {
count.total += size
if (count.current === size) {
await searchDup(count, lastsort, last)
} else {
console.log('Index: ' + constant.index + ' - Total: ' + count.total + ' - Deleted: ' + count.deleted)
}
})
result.pipe(pipeline)
}, },
job = schedule.scheduleJob('0 ' + constant.taskdeletedup + ' * * *', async () => { job = schedule.scheduleJob('0 ' + constant.taskdeletedup + ' * * *', async () => {
await deleteDup() await deleteDup()