Comparar commits

...

2 Commits

Autor SHA1 Mensaje Fecha
ale
9c0c30e846 show stats
Signed-off-by: ale <ale@manalejandro.com>
2025-12-07 01:30:51 +01:00
ale
179e192e82 script --no-check
Signed-off-by: ale <ale@manalejandro.com>
2025-12-07 01:28:37 +01:00
Se han modificado 2 ficheros con 121 adiciones y 53 borrados

Ver fichero

@@ -1,7 +1,7 @@
'use client';
import { useState } from 'react';
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2 } from 'lucide-react';
import { useState, useEffect } from 'react';
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database } from 'lucide-react';
interface SearchResult {
found: boolean;
@@ -30,12 +30,48 @@ interface SearchResult {
message?: string;
}
interface IndexStats {
documentCount: number;
indexSize: number;
}
function formatBytes(bytes: number): string {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
}
function formatNumber(num: number): string {
return num.toLocaleString();
}
export default function Home() {
const [query, setQuery] = useState('');
const [result, setResult] = useState<SearchResult | null>(null);
const [loading, setLoading] = useState(false);
const [error, setError] = useState('');
const [copiedField, setCopiedField] = useState<string | null>(null);
const [stats, setStats] = useState<IndexStats | null>(null);
useEffect(() => {
const fetchStats = async () => {
try {
const response = await fetch('/api/health');
if (response.ok) {
const data = await response.json();
if (data.index?.stats) {
setStats(data.index.stats);
}
}
} catch (_err) {
// Silently fail - stats are not critical
}
};
fetchStats();
}, [result]); // Refresh stats after each search result
const handleSearch = async (e: React.FormEvent) => {
e.preventDefault();
@@ -110,6 +146,18 @@ export default function Home() {
<p className="text-sm text-gray-500 mt-2">
Supports MD5, SHA1, SHA256, SHA512, and Bcrypt
</p>
{stats && (
<div className="flex items-center justify-center gap-4 mt-4 text-sm text-gray-500">
<div className="flex items-center gap-1.5">
<Database className="w-4 h-4" />
<span><strong>{formatNumber(stats.documentCount)}</strong> hashes</span>
</div>
<span className="text-gray-300"></span>
<div>
<span><strong>{formatBytes(stats.indexSize)}</strong> indexed</span>
</div>
</div>
)}
</div>
{/* Search Form */}

Ver fichero

@@ -14,6 +14,7 @@
* --batch-size=<number> Number of items to process in each batch (default: 100)
* --resume Resume from last saved state (default: true)
* --no-resume Start from beginning, ignore saved state
* --no-check Skip duplicate checking (faster, but may create duplicates)
* --state-file=<path> Custom state file path (default: .indexer-state-<filename>.json)
* --help, -h Show this help message
*/
@@ -54,6 +55,7 @@ interface ParsedArgs {
filePath: string | null;
batchSize: number;
resume: boolean;
checkDuplicates: boolean;
stateFile: string | null;
showHelp: boolean;
}
@@ -63,6 +65,7 @@ function parseArgs(args: string[]): ParsedArgs {
filePath: null,
batchSize: DEFAULT_BATCH_SIZE,
resume: true,
checkDuplicates: true,
stateFile: null,
showHelp: false
};
@@ -76,6 +79,8 @@ function parseArgs(args: string[]): ParsedArgs {
result.resume = true;
} else if (arg === '--no-resume') {
result.resume = false;
} else if (arg === '--no-check') {
result.checkDuplicates = false;
} else if (arg.startsWith('--batch-size=')) {
const value = arg.split('=')[1];
const parsed = parseInt(value, 10);
@@ -180,6 +185,7 @@ Options:
--batch-size <number> Alternative syntax for batch size
--resume Resume from last saved state (default)
--no-resume Start from beginning, ignore saved state
--no-check Skip duplicate checking (faster, but may create duplicates)
--state-file=<path> Custom state file path
--help, -h Show this help message
@@ -191,17 +197,23 @@ Examples:
npx tsx scripts/index-file.ts wordlist.txt --batch-size=500
npx tsx scripts/index-file.ts wordlist.txt --batch-size 500
npx tsx scripts/index-file.ts wordlist.txt --no-resume
npm run index-file -- wordlist.txt --batch-size=500
npx tsx scripts/index-file.ts wordlist.txt --no-check
npm run index-file -- wordlist.txt --batch-size=500 --no-check
State Management:
The script automatically saves progress to a state file. If interrupted,
it will resume from where it left off on the next run. Use --no-resume
to start fresh.
Duplicate Checking:
By default, the script checks if each plaintext or hash already exists
in the index before inserting. Use --no-check to skip this verification
for faster indexing (useful when you're sure there are no duplicates).
`);
process.exit(0);
}
async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, customStateFile: string | null) {
async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, checkDuplicates: boolean, customStateFile: string | null) {
const client = new Client({ node: ELASTICSEARCH_NODE });
const absolutePath = resolve(filePath);
const stateFile = customStateFile || getDefaultStateFile(absolutePath);
@@ -244,6 +256,7 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
console.log(`Index: ${INDEX_NAME}`);
console.log(`File: ${filePath}`);
console.log(`Batch size: ${batchSize}`);
console.log(`Check duplicates: ${checkDuplicates ? 'yes' : 'no (--no-check)'}`);
console.log(`State file: ${stateFile}`);
if (resumingFrom > 0) {
console.log(`Resuming from: line ${resumingFrom}`);
@@ -307,58 +320,64 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
}))
);
// Check which items already exist (by plaintext or any hash)
const md5List = batchWithHashes.map((item: any) => item.hashes.md5);
const sha1List = batchWithHashes.map((item: any) => item.hashes.sha1);
const sha256List = batchWithHashes.map((item: any) => item.hashes.sha256);
const sha512List = batchWithHashes.map((item: any) => item.hashes.sha512);
if (checkDuplicates) {
// Check which items already exist (by plaintext or any hash)
const md5List = batchWithHashes.map((item: any) => item.hashes.md5);
const sha1List = batchWithHashes.map((item: any) => item.hashes.sha1);
const sha256List = batchWithHashes.map((item: any) => item.hashes.sha256);
const sha512List = batchWithHashes.map((item: any) => item.hashes.sha512);
const existingCheck = await client.search({
index: INDEX_NAME,
size: batchSize * 5,
query: {
bool: {
should: [
{ terms: { 'plaintext.keyword': batch } },
{ terms: { md5: md5List } },
{ terms: { sha1: sha1List } },
{ terms: { sha256: sha256List } },
{ terms: { sha512: sha512List } },
],
minimum_should_match: 1
const existingCheck = await client.search({
index: INDEX_NAME,
size: batchSize * 5,
query: {
bool: {
should: [
{ terms: { 'plaintext.keyword': batch } },
{ terms: { md5: md5List } },
{ terms: { sha1: sha1List } },
{ terms: { sha256: sha256List } },
{ terms: { sha512: sha512List } },
],
minimum_should_match: 1
}
},
_source: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512']
});
// Create a set of existing hashes for quick lookup
const existingHashes = new Set<string>();
existingCheck.hits.hits.forEach((hit: any) => {
const src = hit._source;
existingHashes.add(src.plaintext);
existingHashes.add(src.md5);
existingHashes.add(src.sha1);
existingHashes.add(src.sha256);
existingHashes.add(src.sha512);
});
// Prepare bulk operations only for items that don't have any duplicate hash
for (const item of batchWithHashes) {
const isDuplicate =
existingHashes.has(item.plaintext) ||
existingHashes.has(item.hashes.md5) ||
existingHashes.has(item.hashes.sha1) ||
existingHashes.has(item.hashes.sha256) ||
existingHashes.has(item.hashes.sha512);
if (!isDuplicate) {
bulkOperations.push({ index: { _index: INDEX_NAME } });
bulkOperations.push(item.hashes);
} else {
state.skipped++;
sessionSkipped++;
}
},
_source: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512']
});
// Create a set of existing hashes for quick lookup
const existingHashes = new Set<string>();
existingCheck.hits.hits.forEach((hit: any) => {
const src = hit._source;
existingHashes.add(src.plaintext);
existingHashes.add(src.md5);
existingHashes.add(src.sha1);
existingHashes.add(src.sha256);
existingHashes.add(src.sha512);
});
// Prepare bulk operations only for items that don't have any duplicate hash
let batchSkipped = 0;
for (const item of batchWithHashes) {
const isDuplicate =
existingHashes.has(item.plaintext) ||
existingHashes.has(item.hashes.md5) ||
existingHashes.has(item.hashes.sha1) ||
existingHashes.has(item.hashes.sha256) ||
existingHashes.has(item.hashes.sha512);
if (!isDuplicate) {
}
} else {
// No duplicate checking - index everything
for (const item of batchWithHashes) {
bulkOperations.push({ index: { _index: INDEX_NAME } });
bulkOperations.push(item.hashes);
} else {
batchSkipped++;
state.skipped++;
sessionSkipped++;
}
}
@@ -498,9 +517,10 @@ console.log(`\n🔧 Configuration:`);
console.log(` File: ${filePath}`);
console.log(` Batch size: ${parsedArgs.batchSize}`);
console.log(` Resume: ${parsedArgs.resume}`);
console.log(` Check duplicates: ${parsedArgs.checkDuplicates}`);
if (parsedArgs.stateFile) {
console.log(` State file: ${parsedArgs.stateFile}`);
}
console.log('');
indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.stateFile).catch(console.error);
indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.checkDuplicates, parsedArgs.stateFile).catch(console.error);