From f85d17604b53c50a71f48e7a1431a2a8a7bb4358 Mon Sep 17 00:00:00 2001 From: ale Date: Mon, 14 Oct 2024 00:05:40 +0200 Subject: [PATCH] stream-json --- lib/apiswagger.js | 42 +++++++++++++++++++++++++++++++----------- package.json | 1 + public/index.html | 2 +- public/main.js | 5 ++--- 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/lib/apiswagger.js b/lib/apiswagger.js index dd94694..afb36b7 100644 --- a/lib/apiswagger.js +++ b/lib/apiswagger.js @@ -3,6 +3,10 @@ const nodeinfo = require('activitypub-express/pub/nodeinfo') module.exports = (app, client) => { const constant = require('./constant'), zlib = require('zlib'), + { pick } = require('stream-json/filters/Pick'), + { parser } = require('stream-json'), + { streamArray } = require('stream-json/streamers/StreamArray'), + { chain } = require('stream-chain'), clean = str => { return str.replace(/[/\\^$+?()`'¡¿¨!"·%&=;,\|\[\]{}]+/gmi, '') } @@ -472,15 +476,24 @@ module.exports = (app, client) => { } } } - }) - const instances = result.hits?.hits?.length > 0 ? result.hits.hits : [], - instancescomment = instances.map(i => ({ - instance: i._source.instance, comment: i._source.blocks.find(block => block.domain === clean(req.params.instance)).comment - })) + }, { asStream: true, meta: false }), + instancescomment = [], + block_count = 0, + pipeline = chain([ + parser(), + pick({ filter: 'hits.hits' }), + streamArray(), + data => { + block_count++ + instancescomment.push({ + instance: data.value._source.instance, comment: data.value._source.blocks.find(block => block.domain === clean(req.params.instance)).comment + }) + } + ]) + result.pipe(pipeline) res.json({ - block_count: instances.length, - instances: instancescomment, - took: result.took + block_count, + instances: instancescomment }) } else { res.status(404).end() @@ -501,15 +514,22 @@ module.exports = (app, client) => { app.use('/api/download_index', async (req, res) => { try { res.setHeader('Content-Type', 'application/gzip') - res.setHeader('Content-disposition', 'attachment; filename=fediblock-index.json.gz') + res.setHeader('Content-disposition', 'attachment; filename=fediblock-index.jsonl.gz') const result = await client.search({ index: constant.index, size: 9999, query: { match_all: {} } - }, { asStream: true, meta: false }) - result.pipe(zlib.createGzip()).pipe(res) + }, { asStream: true, meta: false }), + pipeline = chain([ + parser(), + pick({ filter: 'hits.hits' }), + streamArray(), + data => JSON.stringify(data.value._source) + '\n', + zlib.createGzip() + ]) + result.pipe(pipeline).pipe(res) } catch (e) { console.error(e) res.status(404).end() diff --git a/package.json b/package.json index aa1501c..d89f769 100644 --- a/package.json +++ b/package.json @@ -25,6 +25,7 @@ "node-schedule": "^2.1.1", "parcel": "^2.12.0", "rotating-file-stream": "^3.2.5", + "stream-json": "^1.8.0", "swagger-jsdoc": "^6.2.8", "swagger-ui-express": "^5.0.1" } diff --git a/public/index.html b/public/index.html index b6c63fb..9469cbf 100644 --- a/public/index.html +++ b/public/index.html @@ -74,7 +74,7 @@ diff --git a/public/main.js b/public/main.js index 9f96996..e834074 100644 --- a/public/main.js +++ b/public/main.js @@ -39,7 +39,6 @@ document.addEventListener('DOMContentLoaded', function () { if (res && res.block_count >= 0) { document.getElementById('blockcount').innerText = res.block_count document.getElementById('blockinstance').innerText = 'ing ' + content - document.getElementById('blocktook').innerText = res.took var list = document.getElementById('blocklist'), download = document.getElementById('download') download.removeAttribute('href') @@ -115,7 +114,7 @@ document.addEventListener('DOMContentLoaded', function () { fetch('/api/download_index', { signal: ac.signal }).then(async function (result) { var res = await result.blob(), a = document.createElement('a') - a.download = 'fediblock-index.json.gz' + a.download = 'fediblock-index.jsonl.gz' a.href = URL.createObjectURL(res) a.type = 'application/gzip' a.target = '_blank' @@ -213,7 +212,7 @@ document.addEventListener('DOMContentLoaded', function () { document.getElementById('blockinstance').innerHTML = 'ed by ' + (res.api ? '' + res.instance + '' : res.instance) + (res.nodeinfo ? ' ' : '') + '
Last update: ' + (new Date(res.last)).toLocaleString() - document.getElementById('blocktook').innerText = res.took + document.getElementById('blocktook').innerText = '(search in ' + res.took + 'ms)' if (csv.split('\n').length > 2) { download.href = window.URL.createObjectURL(new Blob([csv], { type: 'text/csv' })) download.download = 'fediblock-' + res.instance + '.csv'