show stats

Signed-off-by: ale <ale@manalejandro.com>
script --no-check
2025-12-07 01:30:51 +01:00 · 2025-12-07 01:28:37 +01:00
--- a/app/page.tsx
+++ b/app/page.tsx
@@ -1,7 +1,7 @@
 'use client';

-import { useState } from 'react';
-import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2 } from 'lucide-react';
+import { useState, useEffect } from 'react';
+import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database } from 'lucide-react';

 interface SearchResult {
  found: boolean;
@@ -30,12 +30,48 @@ interface SearchResult {
  message?: string;
 }

+interface IndexStats {
+  documentCount: number;
+  indexSize: number;
+}
+
+function formatBytes(bytes: number): string {
+  if (bytes === 0) return '0 B';
+  const k = 1024;
+  const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
+  const i = Math.floor(Math.log(bytes) / Math.log(k));
+  return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
+}
+
+function formatNumber(num: number): string {
+  return num.toLocaleString();
+}
+
 export default function Home() {
  const [query, setQuery] = useState('');
  const [result, setResult] = useState<SearchResult | null>(null);
  const [loading, setLoading] = useState(false);
  const [error, setError] = useState('');
  const [copiedField, setCopiedField] = useState<string | null>(null);
+  const [stats, setStats] = useState<IndexStats | null>(null);
+
+  useEffect(() => {
+    const fetchStats = async () => {
+      try {
+        const response = await fetch('/api/health');
+        if (response.ok) {
+          const data = await response.json();
+          if (data.index?.stats) {
+            setStats(data.index.stats);
+          }
+        }
+      } catch (_err) {
+        // Silently fail - stats are not critical
+      }
+    };
+
+    fetchStats();
+  }, [result]); // Refresh stats after each search result

  const handleSearch = async (e: React.FormEvent) => {
    e.preventDefault();
@@ -110,6 +146,18 @@ export default function Home() {
          <p className="text-sm text-gray-500 mt-2">
            Supports MD5, SHA1, SHA256, SHA512, and Bcrypt
          </p>
+          {stats && (
+            <div className="flex items-center justify-center gap-4 mt-4 text-sm text-gray-500">
+              <div className="flex items-center gap-1.5">
+                <Database className="w-4 h-4" />
+                <span><strong>{formatNumber(stats.documentCount)}</strong> hashes</span>
+              </div>
+              <span className="text-gray-300">•</span>
+              <div>
+                <span><strong>{formatBytes(stats.indexSize)}</strong> indexed</span>
+              </div>
+            </div>
+          )}
        </div>

        {/* Search Form */}
--- a/scripts/index-file.ts
+++ b/scripts/index-file.ts
@@ -14,6 +14,7 @@
 *   --batch-size=<number>  Number of items to process in each batch (default: 100)
 *   --resume               Resume from last saved state (default: true)
 *   --no-resume            Start from beginning, ignore saved state
+ *   --no-check             Skip duplicate checking (faster, but may create duplicates)
 *   --state-file=<path>    Custom state file path (default: .indexer-state-<filename>.json)
 *   --help, -h             Show this help message
 */
@@ -54,6 +55,7 @@ interface ParsedArgs {
  filePath: string | null;
  batchSize: number;
  resume: boolean;
+  checkDuplicates: boolean;
  stateFile: string | null;
  showHelp: boolean;
 }
@@ -63,6 +65,7 @@ function parseArgs(args: string[]): ParsedArgs {
    filePath: null,
    batchSize: DEFAULT_BATCH_SIZE,
    resume: true,
+    checkDuplicates: true,
    stateFile: null,
    showHelp: false
  };
@@ -76,6 +79,8 @@ function parseArgs(args: string[]): ParsedArgs {
      result.resume = true;
    } else if (arg === '--no-resume') {
      result.resume = false;
+    } else if (arg === '--no-check') {
+      result.checkDuplicates = false;
    } else if (arg.startsWith('--batch-size=')) {
      const value = arg.split('=')[1];
      const parsed = parseInt(value, 10);
@@ -180,6 +185,7 @@ Options:
  --batch-size <number>  Alternative syntax for batch size
  --resume               Resume from last saved state (default)
  --no-resume            Start from beginning, ignore saved state
+  --no-check             Skip duplicate checking (faster, but may create duplicates)
  --state-file=<path>    Custom state file path
  --help, -h             Show this help message

@@ -191,17 +197,23 @@ Examples:
  npx tsx scripts/index-file.ts wordlist.txt --batch-size=500
  npx tsx scripts/index-file.ts wordlist.txt --batch-size 500
  npx tsx scripts/index-file.ts wordlist.txt --no-resume
-  npm run index-file -- wordlist.txt --batch-size=500
+  npx tsx scripts/index-file.ts wordlist.txt --no-check
+  npm run index-file -- wordlist.txt --batch-size=500 --no-check

 State Management:
  The script automatically saves progress to a state file. If interrupted,
  it will resume from where it left off on the next run. Use --no-resume
  to start fresh.
+
+Duplicate Checking:
+  By default, the script checks if each plaintext or hash already exists
+  in the index before inserting. Use --no-check to skip this verification
+  for faster indexing (useful when you're sure there are no duplicates).
 `);
  process.exit(0);
 }

-async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, customStateFile: string | null) {
+async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, checkDuplicates: boolean, customStateFile: string | null) {
  const client = new Client({ node: ELASTICSEARCH_NODE });
  const absolutePath = resolve(filePath);
  const stateFile = customStateFile || getDefaultStateFile(absolutePath);
@@ -244,6 +256,7 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
  console.log(`Index: ${INDEX_NAME}`);
  console.log(`File: ${filePath}`);
  console.log(`Batch size: ${batchSize}`);
+  console.log(`Check duplicates: ${checkDuplicates ? 'yes' : 'no (--no-check)'}`);
  console.log(`State file: ${stateFile}`);
  if (resumingFrom > 0) {
    console.log(`Resuming from: line ${resumingFrom}`);
@@ -307,58 +320,64 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
        }))
      );

-      // Check which items already exist (by plaintext or any hash)
-      const md5List = batchWithHashes.map((item: any) => item.hashes.md5);
-      const sha1List = batchWithHashes.map((item: any) => item.hashes.sha1);
-      const sha256List = batchWithHashes.map((item: any) => item.hashes.sha256);
-      const sha512List = batchWithHashes.map((item: any) => item.hashes.sha512);
+      if (checkDuplicates) {
+        // Check which items already exist (by plaintext or any hash)
+        const md5List = batchWithHashes.map((item: any) => item.hashes.md5);
+        const sha1List = batchWithHashes.map((item: any) => item.hashes.sha1);
+        const sha256List = batchWithHashes.map((item: any) => item.hashes.sha256);
+        const sha512List = batchWithHashes.map((item: any) => item.hashes.sha512);

-      const existingCheck = await client.search({
-        index: INDEX_NAME,
-        size: batchSize * 5,
-        query: {
-          bool: {
-            should: [
-              { terms: { 'plaintext.keyword': batch } },
-              { terms: { md5: md5List } },
-              { terms: { sha1: sha1List } },
-              { terms: { sha256: sha256List } },
-              { terms: { sha512: sha512List } },
-            ],
-            minimum_should_match: 1
+        const existingCheck = await client.search({
+          index: INDEX_NAME,
+          size: batchSize * 5,
+          query: {
+            bool: {
+              should: [
+                { terms: { 'plaintext.keyword': batch } },
+                { terms: { md5: md5List } },
+                { terms: { sha1: sha1List } },
+                { terms: { sha256: sha256List } },
+                { terms: { sha512: sha512List } },
+              ],
+              minimum_should_match: 1
+            }
+          },
+          _source: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512']
+        });
+
+        // Create a set of existing hashes for quick lookup
+        const existingHashes = new Set<string>();
+        existingCheck.hits.hits.forEach((hit: any) => {
+          const src = hit._source;
+          existingHashes.add(src.plaintext);
+          existingHashes.add(src.md5);
+          existingHashes.add(src.sha1);
+          existingHashes.add(src.sha256);
+          existingHashes.add(src.sha512);
+        });
+
+        // Prepare bulk operations only for items that don't have any duplicate hash
+        for (const item of batchWithHashes) {
+          const isDuplicate = 
+            existingHashes.has(item.plaintext) ||
+            existingHashes.has(item.hashes.md5) ||
+            existingHashes.has(item.hashes.sha1) ||
+            existingHashes.has(item.hashes.sha256) ||
+            existingHashes.has(item.hashes.sha512);
+
+          if (!isDuplicate) {
+            bulkOperations.push({ index: { _index: INDEX_NAME } });
+            bulkOperations.push(item.hashes);
+          } else {
+            state.skipped++;
+            sessionSkipped++;
          }
-        },
-        _source: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512']
-      });
-
-      // Create a set of existing hashes for quick lookup
-      const existingHashes = new Set<string>();
-      existingCheck.hits.hits.forEach((hit: any) => {
-        const src = hit._source;
-        existingHashes.add(src.plaintext);
-        existingHashes.add(src.md5);
-        existingHashes.add(src.sha1);
-        existingHashes.add(src.sha256);
-        existingHashes.add(src.sha512);
-      });
-
-      // Prepare bulk operations only for items that don't have any duplicate hash
-      let batchSkipped = 0;
-      for (const item of batchWithHashes) {
-        const isDuplicate = 
-          existingHashes.has(item.plaintext) ||
-          existingHashes.has(item.hashes.md5) ||
-          existingHashes.has(item.hashes.sha1) ||
-          existingHashes.has(item.hashes.sha256) ||
-          existingHashes.has(item.hashes.sha512);
-
-        if (!isDuplicate) {
+        }
+      } else {
+        // No duplicate checking - index everything
+        for (const item of batchWithHashes) {
          bulkOperations.push({ index: { _index: INDEX_NAME } });
          bulkOperations.push(item.hashes);
-        } else {
-          batchSkipped++;
-          state.skipped++;
-          sessionSkipped++;
        }
      }

@@ -498,9 +517,10 @@ console.log(`\n🔧 Configuration:`);
 console.log(`   File: ${filePath}`);
 console.log(`   Batch size: ${parsedArgs.batchSize}`);
 console.log(`   Resume: ${parsedArgs.resume}`);
+console.log(`   Check duplicates: ${parsedArgs.checkDuplicates}`);
 if (parsedArgs.stateFile) {
  console.log(`   State file: ${parsedArgs.stateFile}`);
 }
 console.log('');

-indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.stateFile).catch(console.error);
+indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.checkDuplicates, parsedArgs.stateFile).catch(console.error);
Autor	SHA1	Mensaje	Fecha
ale	9c0c30e846	show stats Signed-off-by: ale <ale@manalejandro.com>	2025-12-07 01:30:51 +01:00
ale	179e192e82	script --no-check Signed-off-by: ale <ale@manalejandro.com>	2025-12-07 01:28:37 +01:00