fediblock-instance/lib/taskdeletedup.js

70 lines
2.7 KiB
JavaScript
Raw Normal View History

2024-10-14 18:13:12 +00:00
module.exports = client => {
const constant = require('./constant'),
schedule = require('node-schedule'),
2024-10-19 18:37:54 +00:00
{ pick } = require('stream-json/filters/Pick'),
{ parser } = require('stream-json'),
{ streamArray } = require('stream-json/streamers/StreamArray'),
{ chain } = require('stream-chain'),
2024-10-14 18:13:12 +00:00
size = 500,
deleteDup = async () => {
2024-10-19 18:37:54 +00:00
const count = { deleted: 0, total: 0, current: 0 }
let lastsort = undefined, last = undefined
await searchDup(count, lastsort, last)
},
searchDup = async (count, lastsort, last) => {
count.current = 0
const result = await client.search({
index: constant.index,
size: size,
body: {
query: {
match_all: {}
},
sort: [{
"instance": {
"order": "asc"
2024-10-14 18:13:12 +00:00
},
2024-10-19 18:37:54 +00:00
"last": {
"order": "desc",
"numeric_type": "date_nanos",
"format": "strict_date_optional_time_nanos"
}
}],
search_after: lastsort
}
}, { asStream: true, meta: false }),
pipeline = chain([
parser(),
pick({ filter: 'hits.hits' }),
streamArray(),
data => data.value
])
pipeline.on('data', async data => {
count.current++
if (last && last.instance === data._source.instance) {
await client.delete({ index: constant.index, id: data._id })
count.deleted++
console.log('deleted ' + data._id + ': ' + data._source.instance)
}
else {
last = data._source
}
if (count.current === size) {
lastsort = data.sort
2024-10-14 18:13:12 +00:00
}
2024-10-19 18:37:54 +00:00
})
pipeline.on('end', async () => {
count.total += size
if (count.current === size) {
await searchDup(count, lastsort, last)
} else {
console.log('Index: ' + constant.index + ' - Total: ' + count.total + ' - Deleted: ' + count.deleted)
2024-10-14 18:13:12 +00:00
}
2024-10-19 18:37:54 +00:00
})
result.pipe(pipeline)
2024-10-14 18:13:12 +00:00
},
job = schedule.scheduleJob('0 ' + constant.taskdeletedup + ' * * *', async () => {
await deleteDup()
})
}