Comparar commits
3 Commits
42bc5a15d0
...
elasticsea
| Autor | SHA1 | Fecha | |
|---|---|---|---|
|
b91d19dc0b
|
|||
|
da89037125
|
|||
|
20f0503134
|
43
app/page.tsx
43
app/page.tsx
@@ -1,7 +1,7 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useEffect, useCallback } from 'react';
|
||||
import { useSearchParams, useRouter } from 'next/navigation';
|
||||
import { useState, useEffect, useCallback, Suspense } from 'react';
|
||||
import { useSearchParams } from 'next/navigation';
|
||||
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database, Link } from 'lucide-react';
|
||||
|
||||
interface SearchResult {
|
||||
@@ -46,9 +46,8 @@ function formatNumber(num: number): string {
|
||||
return num.toLocaleString();
|
||||
}
|
||||
|
||||
export default function Home() {
|
||||
function HasherContent() {
|
||||
const searchParams = useSearchParams();
|
||||
const router = useRouter();
|
||||
const [query, setQuery] = useState('');
|
||||
const [result, setResult] = useState<SearchResult | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
@@ -56,8 +55,9 @@ export default function Home() {
|
||||
const [copiedField, setCopiedField] = useState<string | null>(null);
|
||||
const [stats, setStats] = useState<IndexStats | null>(null);
|
||||
const [copiedLink, setCopiedLink] = useState(false);
|
||||
const [initialLoadDone, setInitialLoadDone] = useState(false);
|
||||
|
||||
const performSearch = useCallback(async (searchQuery: string) => {
|
||||
const performSearch = useCallback(async (searchQuery: string, updateUrl: boolean = true) => {
|
||||
if (!searchQuery.trim()) return;
|
||||
|
||||
setLoading(true);
|
||||
@@ -78,25 +78,30 @@ export default function Home() {
|
||||
const data = await response.json();
|
||||
setResult(data);
|
||||
|
||||
// Update URL with search query
|
||||
// Update URL with search query (using history API to avoid re-triggering effects)
|
||||
if (updateUrl) {
|
||||
const newUrl = new URL(window.location.href);
|
||||
newUrl.searchParams.set('q', searchQuery.trim());
|
||||
router.replace(newUrl.pathname + newUrl.search, { scroll: false });
|
||||
window.history.replaceState(null, '', newUrl.pathname + newUrl.search);
|
||||
}
|
||||
} catch (_err) {
|
||||
setError('Failed to perform search. Please check your connection.');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, [router]);
|
||||
}, []);
|
||||
|
||||
// Load query from URL on mount
|
||||
// Load query from URL on mount (only once)
|
||||
useEffect(() => {
|
||||
if (initialLoadDone) return;
|
||||
|
||||
const urlQuery = searchParams.get('q');
|
||||
if (urlQuery) {
|
||||
setQuery(urlQuery);
|
||||
performSearch(urlQuery);
|
||||
performSearch(urlQuery, false);
|
||||
}
|
||||
}, [searchParams, performSearch]);
|
||||
setInitialLoadDone(true);
|
||||
}, [searchParams, performSearch, initialLoadDone]);
|
||||
|
||||
useEffect(() => {
|
||||
const fetchStats = async () => {
|
||||
@@ -361,3 +366,19 @@ export default function Home() {
|
||||
);
|
||||
}
|
||||
|
||||
function LoadingFallback() {
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-blue-50 via-white to-purple-50 flex items-center justify-center">
|
||||
<Loader2 className="w-12 h-12 text-blue-600 animate-spin" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<Suspense fallback={<LoadingFallback />}>
|
||||
<HasherContent />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -225,38 +225,41 @@ async function findDuplicatesForField(
|
||||
return duplicates;
|
||||
}
|
||||
|
||||
async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
const client = new Client({ node: ELASTICSEARCH_NODE });
|
||||
const fields = parsedArgs.field
|
||||
? [parsedArgs.field]
|
||||
: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];
|
||||
|
||||
console.log(`🔍 Hasher Duplicate Remover`);
|
||||
/**
|
||||
* Phase 1: Initialize and connect to Elasticsearch
|
||||
*/
|
||||
async function phase1_InitAndConnect() {
|
||||
console.log(`🔍 Hasher Duplicate Remover - Phase 1: Initialization`);
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Elasticsearch: ${ELASTICSEARCH_NODE}`);
|
||||
console.log(`Index: ${INDEX_NAME}`);
|
||||
console.log(`Mode: ${parsedArgs.dryRun ? '🔎 DRY RUN (no changes)' : '⚠️ EXECUTE (will delete)'}`);
|
||||
console.log(`Batch size: ${parsedArgs.batchSize}`);
|
||||
console.log(`Fields to check: ${fields.join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
const client = new Client({ node: ELASTICSEARCH_NODE });
|
||||
|
||||
console.log('🔗 Connecting to Elasticsearch...');
|
||||
await client.cluster.health({});
|
||||
console.log('✅ Connected successfully\n');
|
||||
|
||||
// Get index stats
|
||||
const countResponse = await client.count({ index: INDEX_NAME });
|
||||
console.log(`📊 Total documents in index: ${countResponse.count}\n`);
|
||||
|
||||
const allDuplicates: DuplicateGroup[] = [];
|
||||
const seenDeleteIds = new Set<string>();
|
||||
return { client, totalDocuments: countResponse.count };
|
||||
}
|
||||
|
||||
// Find duplicates for each field
|
||||
for (const field of fields) {
|
||||
console.log(`🔍 Checking duplicates for field: ${field}...`);
|
||||
const fieldDuplicates = await findDuplicatesForField(client, field, parsedArgs.batchSize);
|
||||
/**
|
||||
* Phase 2: Find duplicates for a specific field
|
||||
*/
|
||||
async function phase2_FindDuplicatesForField(
|
||||
client: Client,
|
||||
field: string,
|
||||
batchSize: number,
|
||||
seenDeleteIds: Set<string>
|
||||
): Promise<{ duplicates: DuplicateGroup[], totalFound: number }> {
|
||||
console.log(`\n🔍 Phase 2: Checking duplicates for field: ${field}...`);
|
||||
|
||||
const fieldDuplicates = await findDuplicatesForField(client, field, batchSize);
|
||||
const duplicates: DuplicateGroup[] = [];
|
||||
|
||||
// Filter out already seen delete IDs to avoid counting the same document multiple times
|
||||
for (const dup of fieldDuplicates) {
|
||||
@@ -264,11 +267,124 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
if (newDeleteIds.length > 0) {
|
||||
dup.deleteIds = newDeleteIds;
|
||||
newDeleteIds.forEach(id => seenDeleteIds.add(id));
|
||||
allDuplicates.push(dup);
|
||||
duplicates.push(dup);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` Found ${fieldDuplicates.length} duplicate groups for ${field}`);
|
||||
console.log(` New unique documents to delete: ${duplicates.reduce((sum, dup) => sum + dup.deleteIds.length, 0)}`);
|
||||
|
||||
// Force garbage collection if available
|
||||
if (global.gc) {
|
||||
global.gc();
|
||||
console.log(` ♻️ Memory freed after processing ${field}`);
|
||||
}
|
||||
|
||||
return { duplicates, totalFound: fieldDuplicates.length };
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 3: Process deletion for a batch of duplicates
|
||||
*/
|
||||
async function phase3_DeleteBatch(
|
||||
client: Client,
|
||||
deleteIds: string[],
|
||||
batchSize: number,
|
||||
startIndex: number
|
||||
): Promise<{ deleted: number, errors: number }> {
|
||||
const batch = deleteIds.slice(startIndex, startIndex + batchSize);
|
||||
let deleted = 0;
|
||||
let errors = 0;
|
||||
|
||||
try {
|
||||
const bulkOperations = batch.flatMap(id => [
|
||||
{ delete: { _index: INDEX_NAME, _id: id } }
|
||||
]);
|
||||
|
||||
const bulkResponse = await client.bulk({
|
||||
operations: bulkOperations,
|
||||
refresh: false
|
||||
});
|
||||
|
||||
if (bulkResponse.errors) {
|
||||
const errorCount = bulkResponse.items.filter((item: any) => item.delete?.error).length;
|
||||
errors += errorCount;
|
||||
deleted += batch.length - errorCount;
|
||||
} else {
|
||||
deleted += batch.length;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`\n❌ Error deleting batch:`, error);
|
||||
errors += batch.length;
|
||||
}
|
||||
|
||||
// Force garbage collection if available
|
||||
if (global.gc) {
|
||||
global.gc();
|
||||
}
|
||||
|
||||
return { deleted, errors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 4: Finalize and report results
|
||||
*/
|
||||
async function phase4_Finalize(
|
||||
client: Client,
|
||||
totalDeleted: number,
|
||||
totalErrors: number,
|
||||
initialDocumentCount: number
|
||||
) {
|
||||
console.log('\n\n🔄 Phase 4: Refreshing index...');
|
||||
await client.indices.refresh({ index: INDEX_NAME });
|
||||
|
||||
const newCountResponse = await client.count({ index: INDEX_NAME });
|
||||
|
||||
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||||
console.log('✅ Duplicate removal complete!');
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Documents deleted: ${totalDeleted}`);
|
||||
console.log(`Errors: ${totalErrors}`);
|
||||
console.log(`Previous document count: ${initialDocumentCount}`);
|
||||
console.log(`New document count: ${newCountResponse.count}`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
const fields = parsedArgs.field
|
||||
? [parsedArgs.field]
|
||||
: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];
|
||||
|
||||
console.log(`Mode: ${parsedArgs.dryRun ? '🔎 DRY RUN (no changes)' : '⚠️ EXECUTE (will delete)'}`);
|
||||
console.log(`Batch size: ${parsedArgs.batchSize}`);
|
||||
console.log(`Fields to check: ${fields.join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// === PHASE 1: Initialize ===
|
||||
const { client, totalDocuments } = await phase1_InitAndConnect();
|
||||
|
||||
// Force garbage collection after phase 1
|
||||
if (global.gc) {
|
||||
global.gc();
|
||||
console.log('♻️ Memory freed after initialization\n');
|
||||
}
|
||||
|
||||
// === PHASE 2: Find duplicates field by field ===
|
||||
const allDuplicates: DuplicateGroup[] = [];
|
||||
const seenDeleteIds = new Set<string>();
|
||||
|
||||
for (const field of fields) {
|
||||
const { duplicates } = await phase2_FindDuplicatesForField(
|
||||
client,
|
||||
field,
|
||||
parsedArgs.batchSize,
|
||||
seenDeleteIds
|
||||
);
|
||||
allDuplicates.push(...duplicates);
|
||||
|
||||
// Clear field duplicates to free memory
|
||||
duplicates.length = 0;
|
||||
}
|
||||
|
||||
const totalToDelete = allDuplicates.reduce((sum, dup) => sum + dup.deleteIds.length, 0);
|
||||
@@ -310,57 +426,40 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Execute deletion
|
||||
console.log(`\n🗑️ Removing ${totalToDelete} duplicate documents...\n`);
|
||||
// === PHASE 3: Execute deletion in batches ===
|
||||
console.log(`\n🗑️ Phase 3: Removing ${totalToDelete} duplicate documents...\n`);
|
||||
|
||||
let deleted = 0;
|
||||
let errors = 0;
|
||||
let totalDeleted = 0;
|
||||
let totalErrors = 0;
|
||||
const deleteIds = allDuplicates.flatMap(dup => dup.deleteIds);
|
||||
|
||||
// Delete in batches
|
||||
// Clear allDuplicates to free memory
|
||||
allDuplicates.length = 0;
|
||||
|
||||
// Delete in batches with memory management
|
||||
for (let i = 0; i < deleteIds.length; i += parsedArgs.batchSize) {
|
||||
const batch = deleteIds.slice(i, i + parsedArgs.batchSize);
|
||||
const { deleted, errors } = await phase3_DeleteBatch(
|
||||
client,
|
||||
deleteIds,
|
||||
parsedArgs.batchSize,
|
||||
i
|
||||
);
|
||||
|
||||
try {
|
||||
const bulkOperations = batch.flatMap(id => [
|
||||
{ delete: { _index: INDEX_NAME, _id: id } }
|
||||
]);
|
||||
totalDeleted += deleted;
|
||||
totalErrors += errors;
|
||||
|
||||
const bulkResponse = await client.bulk({
|
||||
operations: bulkOperations,
|
||||
refresh: false
|
||||
});
|
||||
|
||||
if (bulkResponse.errors) {
|
||||
const errorCount = bulkResponse.items.filter((item: any) => item.delete?.error).length;
|
||||
errors += errorCount;
|
||||
deleted += batch.length - errorCount;
|
||||
} else {
|
||||
deleted += batch.length;
|
||||
process.stdout.write(
|
||||
`\r⏳ Progress: ${Math.min(i + parsedArgs.batchSize, deleteIds.length)}/${deleteIds.length} - ` +
|
||||
`Deleted: ${totalDeleted}, Errors: ${totalErrors}`
|
||||
);
|
||||
}
|
||||
|
||||
process.stdout.write(`\r⏳ Progress: ${Math.min(i + parsedArgs.batchSize, deleteIds.length)}/${deleteIds.length} - Deleted: ${deleted}, Errors: ${errors}`);
|
||||
} catch (error) {
|
||||
console.error(`\n❌ Error deleting batch:`, error);
|
||||
errors += batch.length;
|
||||
}
|
||||
}
|
||||
// Clear deleteIds to free memory
|
||||
deleteIds.length = 0;
|
||||
seenDeleteIds.clear();
|
||||
|
||||
// Refresh index
|
||||
console.log('\n\n🔄 Refreshing index...');
|
||||
await client.indices.refresh({ index: INDEX_NAME });
|
||||
|
||||
// Get new count
|
||||
const newCountResponse = await client.count({ index: INDEX_NAME });
|
||||
|
||||
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||||
console.log('✅ Duplicate removal complete!');
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Documents deleted: ${deleted}`);
|
||||
console.log(`Errors: ${errors}`);
|
||||
console.log(`Previous document count: ${countResponse.count}`);
|
||||
console.log(`New document count: ${newCountResponse.count}`);
|
||||
console.log('');
|
||||
// === PHASE 4: Finalize ===
|
||||
await phase4_Finalize(client, totalDeleted, totalErrors, totalDocuments);
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
|
||||
|
||||
Referencia en una nueva incidencia
Block a user