Comparar commits

..

8 Commits

Autor SHA1 Mensaje Fecha
ale
b91d19dc0b fix memory remove dup
Signed-off-by: ale <ale@manalejandro.com>
2025-12-21 22:36:31 +01:00
ale
da89037125 out useRouter
Signed-off-by: ale <ale@manalejandro.com>
2025-12-11 00:46:59 +01:00
ale
20f0503134 fix share link
Signed-off-by: ale <ale@manalejandro.com>
2025-12-08 23:11:25 +01:00
ale
42bc5a15d0 sanitize nosql
Signed-off-by: ale <ale@manalejandro.com>
2025-12-08 23:08:38 +01:00
ale
2de78b7461 share link
Signed-off-by: ale <ale@manalejandro.com>
2025-12-08 23:08:24 +01:00
ale
8fa586731a out bcrypt
Signed-off-by: ale <ale@manalejandro.com>
2025-12-08 21:06:35 +01:00
ale
ad7a1cf0a7 scroll api
Signed-off-by: ale <ale@manalejandro.com>
2025-12-08 20:58:02 +01:00
ale
459cdcd9bc remove-duplicates
Signed-off-by: ale <ale@manalejandro.com>
2025-12-08 20:56:04 +01:00
Se han modificado 14 ficheros con 711 adiciones y 115 borrados

1
API.md
Ver fichero

@@ -179,7 +179,6 @@ The API automatically detects hash types based on length and format:
| SHA1 | 40 | `^[a-f0-9]{40}$` | | SHA1 | 40 | `^[a-f0-9]{40}$` |
| SHA256 | 64 | `^[a-f0-9]{64}$` | | SHA256 | 64 | `^[a-f0-9]{64}$` |
| SHA512 | 128 | `^[a-f0-9]{128}$` | | SHA512 | 128 | `^[a-f0-9]{128}$` |
| Bcrypt | 60 | `^\$2[abxy]\$` |
Hashes are case-insensitive. Hashes are case-insensitive.

Ver fichero

@@ -10,12 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added ### Added
#### Core Features #### Core Features
- Hash search functionality for MD5, SHA1, SHA256, SHA512, and Bcrypt - Hash search functionality for MD5, SHA1, SHA256, and SHA512
- Hash generation from plaintext input - Hash generation from plaintext input
- Automatic detection of hash types based on length and pattern - Automatic detection of hash types based on length and pattern
- Real-time hash generation with instant results - Real-time hash generation with instant results
- Copy to clipboard functionality for all hash values - Copy to clipboard functionality for all hash values
- Bcrypt verification support
#### Backend #### Backend
- Elasticsearch integration with configurable endpoint - Elasticsearch integration with configurable endpoint

Ver fichero

@@ -13,7 +13,7 @@
## ✨ Key Features ## ✨ Key Features
### 🔍 Hash Search ### 🔍 Hash Search
- Search for MD5, SHA1, SHA256, SHA512, and Bcrypt hashes - Search for MD5, SHA1, SHA256, and SHA512 hashes
- Automatic hash type detection - Automatic hash type detection
- Case-insensitive matching - Case-insensitive matching
- Real-time results - Real-time results
@@ -174,7 +174,6 @@ export ELASTICSEARCH_NODE=http://localhost:9200
| SHA1 | 40 | `^[a-f0-9]{40}$` | | SHA1 | 40 | `^[a-f0-9]{40}$` |
| SHA256 | 64 | `^[a-f0-9]{64}$` | | SHA256 | 64 | `^[a-f0-9]{64}$` |
| SHA512 | 128 | `^[a-f0-9]{128}$` | | SHA512 | 128 | `^[a-f0-9]{128}$` |
| Bcrypt | 60 | `^\$2[abxy]\$` |
--- ---
@@ -245,7 +244,6 @@ export ELASTICSEARCH_NODE=http://localhost:9200
## 📈 Future Enhancements ## 📈 Future Enhancements
### Planned Features ### Planned Features
- Bcrypt hash validation
- Argon2 hash support - Argon2 hash support
- Search history - Search history
- Batch lookup - Batch lookup

Ver fichero

@@ -25,7 +25,6 @@ npm run index-file -- --help # Show help
| SHA1 | 40 | `5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8` | | SHA1 | 40 | `5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8` |
| SHA256 | 64 | `5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8` | | SHA256 | 64 | `5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8` |
| SHA512 | 128 | `b109f3bbbc244eb82441917ed06d618b9008dd09b3befd1b5e07394c706a8bb9...` | | SHA512 | 128 | `b109f3bbbc244eb82441917ed06d618b9008dd09b3befd1b5e07394c706a8bb9...` |
| Bcrypt | 60 | `$2b$10$N9qo8uLOickgx2ZMRZoMye...` |
## 🔌 API Quick Reference ## 🔌 API Quick Reference

Ver fichero

@@ -8,7 +8,7 @@ A modern, high-performance hash search and generation tool powered by Elasticsea
## ✨ Features ## ✨ Features
- 🔍 **Hash Lookup**: Search for MD5, SHA1, SHA256, SHA512, and Bcrypt hashes - 🔍 **Hash Lookup**: Search for MD5, SHA1, SHA256, and SHA512 hashes
- 🔑 **Hash Generation**: Generate multiple hash types from plaintext - 🔑 **Hash Generation**: Generate multiple hash types from plaintext
- 💾 **Auto-Indexing**: Automatically stores searched plaintext and hashes - 💾 **Auto-Indexing**: Automatically stores searched plaintext and hashes
- 📊 **Elasticsearch Backend**: Scalable storage with 10 shards for performance - 📊 **Elasticsearch Backend**: Scalable storage with 10 shards for performance
@@ -274,7 +274,6 @@ npm run lint
| SHA1 | 40 | `^[a-f0-9]{40}$` | | SHA1 | 40 | `^[a-f0-9]{40}$` |
| SHA256 | 64 | `^[a-f0-9]{64}$` | | SHA256 | 64 | `^[a-f0-9]{64}$` |
| SHA512 | 128 | `^[a-f0-9]{128}$` | | SHA512 | 128 | `^[a-f0-9]{128}$` |
| Bcrypt | 60 | `^\$2[abxy]\$` |
## 🚀 Performance ## 🚀 Performance

Ver fichero

@@ -8,17 +8,104 @@ interface HashDocument {
sha1: string; sha1: string;
sha256: string; sha256: string;
sha512: string; sha512: string;
bcrypt: string;
created_at?: string; created_at?: string;
} }
// Maximum allowed query length
const MAX_QUERY_LENGTH = 1000;
// Characters that could be used in NoSQL/Elasticsearch injection attacks
const DANGEROUS_PATTERNS = [
/[{}\[\]]/g, // JSON structure characters
/\$[a-zA-Z]/g, // MongoDB-style operators
/\\u[0-9a-fA-F]{4}/g, // Unicode escapes
/<script/gi, // XSS attempts
/javascript:/gi, // XSS attempts
];
/**
* Sanitize input to prevent NoSQL injection attacks
* For hash lookups, we only need alphanumeric characters and $
* For plaintext, we allow more characters but sanitize dangerous patterns
*/
function sanitizeInput(input: string): string {
// Trim and take first word only
let sanitized = input.trim().split(/\s+/)[0] || '';
// Limit length
if (sanitized.length > MAX_QUERY_LENGTH) {
sanitized = sanitized.substring(0, MAX_QUERY_LENGTH);
}
// Remove null bytes
sanitized = sanitized.replace(/\0/g, '');
// Check for dangerous patterns
for (const pattern of DANGEROUS_PATTERNS) {
sanitized = sanitized.replace(pattern, '');
}
return sanitized;
}
/**
* Validate that the input is safe for use in Elasticsearch queries
*/
function isValidInput(input: string): boolean {
// Check for empty input
if (!input || input.length === 0) {
return false;
}
// Check for excessively long input
if (input.length > MAX_QUERY_LENGTH) {
return false;
}
// Check for control characters (except normal whitespace)
if (/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/.test(input)) {
return false;
}
return true;
}
export async function POST(request: NextRequest) { export async function POST(request: NextRequest) {
try { try {
const { query } = await request.json(); const body = await request.json();
// Validate request body structure
if (!body || typeof body !== 'object') {
return NextResponse.json(
{ error: 'Invalid request body' },
{ status: 400 }
);
}
const { query } = body;
// Validate query type
if (!query || typeof query !== 'string') { if (!query || typeof query !== 'string') {
return NextResponse.json( return NextResponse.json(
{ error: 'Query parameter is required' }, { error: 'Query parameter is required and must be a string' },
{ status: 400 }
);
}
// Validate input before processing
if (!isValidInput(query)) {
return NextResponse.json(
{ error: 'Invalid query: contains forbidden characters or is too long' },
{ status: 400 }
);
}
// Sanitize input
const cleanQuery = sanitizeInput(query);
if (!cleanQuery) {
return NextResponse.json(
{ error: 'Invalid query: only whitespace or invalid characters provided' },
{ status: 400 } { status: 400 }
); );
} }
@@ -26,15 +113,6 @@ export async function POST(request: NextRequest) {
// Ensure index exists // Ensure index exists
await initializeIndex(); await initializeIndex();
const cleanQuery = query.trim().split(/\s+/)[0];
if (!cleanQuery) {
return NextResponse.json(
{ error: 'Invalid query: only whitespace provided' },
{ status: 400 }
);
}
const cleanQueryLower = cleanQuery.toLowerCase(); const cleanQueryLower = cleanQuery.toLowerCase();
const hashType = detectHashType(cleanQueryLower); const hashType = detectHashType(cleanQueryLower);
@@ -44,7 +122,7 @@ export async function POST(request: NextRequest) {
index: INDEX_NAME, index: INDEX_NAME,
query: { query: {
term: { term: {
[hashType]: hashType === 'bcrypt' ? cleanQuery : cleanQueryLower [hashType]: cleanQueryLower
} }
} }
}); });
@@ -66,7 +144,6 @@ export async function POST(request: NextRequest) {
sha1: source.sha1, sha1: source.sha1,
sha256: source.sha256, sha256: source.sha256,
sha512: source.sha512, sha512: source.sha512,
bcrypt: source.bcrypt,
} }
}; };
}) })
@@ -101,11 +178,10 @@ export async function POST(request: NextRequest) {
sha1: existingDoc.sha1, sha1: existingDoc.sha1,
sha256: existingDoc.sha256, sha256: existingDoc.sha256,
sha512: existingDoc.sha512, sha512: existingDoc.sha512,
bcrypt: existingDoc.bcrypt,
}; };
} else { } else {
// Plaintext not found, generate hashes and check if any hash already exists // Plaintext not found, generate hashes and check if any hash already exists
hashes = await generateHashes(cleanQuery); hashes = generateHashes(cleanQuery);
const hashExistsResponse = await esClient.search<HashDocument>({ const hashExistsResponse = await esClient.search<HashDocument>({
index: INDEX_NAME, index: INDEX_NAME,
@@ -147,7 +223,6 @@ export async function POST(request: NextRequest) {
sha1: hashes.sha1, sha1: hashes.sha1,
sha256: hashes.sha256, sha256: hashes.sha256,
sha512: hashes.sha512, sha512: hashes.sha512,
bcrypt: hashes.bcrypt,
} }
}); });
} }

Ver fichero

@@ -14,8 +14,8 @@ const geistMono = Geist_Mono({
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Hasher - Hash Search & Generator", title: "Hasher - Hash Search & Generator",
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt. Powered by Elasticsearch.", description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512. Powered by Elasticsearch.",
keywords: ["hash", "md5", "sha1", "sha256", "sha512", "bcrypt", "hash generator", "hash search", "elasticsearch"], keywords: ["hash", "md5", "sha1", "sha256", "sha512", "hash generator", "hash search", "elasticsearch"],
authors: [{ name: "Hasher" }], authors: [{ name: "Hasher" }],
creator: "Hasher", creator: "Hasher",
publisher: "Hasher", publisher: "Hasher",
@@ -28,7 +28,7 @@ export const metadata: Metadata = {
openGraph: { openGraph: {
type: "website", type: "website",
title: "Hasher - Hash Search & Generator", title: "Hasher - Hash Search & Generator",
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt.", description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512.",
siteName: "Hasher", siteName: "Hasher",
images: [ images: [
{ {
@@ -42,7 +42,7 @@ export const metadata: Metadata = {
twitter: { twitter: {
card: "summary", card: "summary",
title: "Hasher - Hash Search & Generator", title: "Hasher - Hash Search & Generator",
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt.", description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512.",
images: ["/logo.png"], images: ["/logo.png"],
}, },
viewport: { viewport: {

Ver fichero

@@ -1,7 +1,8 @@
'use client'; 'use client';
import { useState, useEffect } from 'react'; import { useState, useEffect, useCallback, Suspense } from 'react';
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database } from 'lucide-react'; import { useSearchParams } from 'next/navigation';
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database, Link } from 'lucide-react';
interface SearchResult { interface SearchResult {
found: boolean; found: boolean;
@@ -15,7 +16,6 @@ interface SearchResult {
sha1: string; sha1: string;
sha256: string; sha256: string;
sha512: string; sha512: string;
bcrypt: string;
}; };
results?: Array<{ results?: Array<{
plaintext: string; plaintext: string;
@@ -24,7 +24,6 @@ interface SearchResult {
sha1: string; sha1: string;
sha256: string; sha256: string;
sha512: string; sha512: string;
bcrypt: string;
}; };
}>; }>;
message?: string; message?: string;
@@ -47,13 +46,62 @@ function formatNumber(num: number): string {
return num.toLocaleString(); return num.toLocaleString();
} }
export default function Home() { function HasherContent() {
const searchParams = useSearchParams();
const [query, setQuery] = useState(''); const [query, setQuery] = useState('');
const [result, setResult] = useState<SearchResult | null>(null); const [result, setResult] = useState<SearchResult | null>(null);
const [loading, setLoading] = useState(false); const [loading, setLoading] = useState(false);
const [error, setError] = useState(''); const [error, setError] = useState('');
const [copiedField, setCopiedField] = useState<string | null>(null); const [copiedField, setCopiedField] = useState<string | null>(null);
const [stats, setStats] = useState<IndexStats | null>(null); const [stats, setStats] = useState<IndexStats | null>(null);
const [copiedLink, setCopiedLink] = useState(false);
const [initialLoadDone, setInitialLoadDone] = useState(false);
const performSearch = useCallback(async (searchQuery: string, updateUrl: boolean = true) => {
if (!searchQuery.trim()) return;
setLoading(true);
setError('');
setResult(null);
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: searchQuery.trim() })
});
if (!response.ok) {
throw new Error('Search failed');
}
const data = await response.json();
setResult(data);
// Update URL with search query (using history API to avoid re-triggering effects)
if (updateUrl) {
const newUrl = new URL(window.location.href);
newUrl.searchParams.set('q', searchQuery.trim());
window.history.replaceState(null, '', newUrl.pathname + newUrl.search);
}
} catch (_err) {
setError('Failed to perform search. Please check your connection.');
} finally {
setLoading(false);
}
}, []);
// Load query from URL on mount (only once)
useEffect(() => {
if (initialLoadDone) return;
const urlQuery = searchParams.get('q');
if (urlQuery) {
setQuery(urlQuery);
performSearch(urlQuery, false);
}
setInitialLoadDone(true);
}, [searchParams, performSearch, initialLoadDone]);
useEffect(() => { useEffect(() => {
const fetchStats = async () => { const fetchStats = async () => {
@@ -75,30 +123,7 @@ export default function Home() {
const handleSearch = async (e: React.FormEvent) => { const handleSearch = async (e: React.FormEvent) => {
e.preventDefault(); e.preventDefault();
if (!query.trim()) return; performSearch(query);
setLoading(true);
setError('');
setResult(null);
try {
const response = await fetch('/api/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ query: query.trim() })
});
if (!response.ok) {
throw new Error('Search failed');
}
const data = await response.json();
setResult(data);
} catch (_err) {
setError('Failed to perform search. Please check your connection.');
} finally {
setLoading(false);
}
}; };
const copyToClipboard = (text: string, field: string) => { const copyToClipboard = (text: string, field: string) => {
@@ -107,6 +132,14 @@ export default function Home() {
setTimeout(() => setCopiedField(null), 2000); setTimeout(() => setCopiedField(null), 2000);
}; };
const copyShareLink = () => {
const url = new URL(window.location.href);
url.searchParams.set('q', query.trim());
navigator.clipboard.writeText(url.toString());
setCopiedLink(true);
setTimeout(() => setCopiedLink(false), 2000);
};
const HashDisplay = ({ label, value, field }: { label: string; value: string; field: string }) => ( const HashDisplay = ({ label, value, field }: { label: string; value: string; field: string }) => (
<div className="bg-gray-50 rounded-lg p-4 border border-gray-200"> <div className="bg-gray-50 rounded-lg p-4 border border-gray-200">
<div className="flex items-center justify-between mb-2"> <div className="flex items-center justify-between mb-2">
@@ -144,7 +177,7 @@ export default function Home() {
Search for hashes or generate them from plaintext Search for hashes or generate them from plaintext
</p> </p>
<p className="text-sm text-gray-500 mt-2"> <p className="text-sm text-gray-500 mt-2">
Supports MD5, SHA1, SHA256, SHA512, and Bcrypt Supports MD5, SHA1, SHA256, and SHA512
</p> </p>
{stats && ( {stats && (
<div className="flex items-center justify-center gap-4 mt-4 text-sm text-gray-500"> <div className="flex items-center justify-center gap-4 mt-4 text-sm text-gray-500">
@@ -168,12 +201,27 @@ export default function Home() {
value={query} value={query}
onChange={(e) => setQuery(e.target.value)} onChange={(e) => setQuery(e.target.value)}
placeholder="Enter a hash or plaintext..." placeholder="Enter a hash or plaintext..."
className="w-full px-6 py-4 pr-14 text-lg rounded-2xl border-2 border-gray-200 focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all shadow-sm" className="w-full px-6 py-4 pr-28 text-lg rounded-2xl border-2 border-gray-200 focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all shadow-sm"
/> />
<div className="absolute right-2 top-1/2 -translate-y-1/2 flex gap-1">
{query.trim() && (
<button
type="button"
onClick={copyShareLink}
className="bg-gray-100 text-gray-600 p-3 rounded-xl hover:bg-gray-200 transition-all"
title="Copy share link"
>
{copiedLink ? (
<Check className="w-6 h-6 text-green-600" />
) : (
<Link className="w-6 h-6" />
)}
</button>
)}
<button <button
type="submit" type="submit"
disabled={loading || !query.trim()} disabled={loading || !query.trim()}
className="absolute right-2 top-1/2 -translate-y-1/2 bg-gradient-to-r from-blue-600 to-purple-600 text-white p-3 rounded-xl hover:shadow-lg disabled:opacity-50 disabled:cursor-not-allowed transition-all" className="bg-gradient-to-r from-blue-600 to-purple-600 text-white p-3 rounded-xl hover:shadow-lg disabled:opacity-50 disabled:cursor-not-allowed transition-all"
> >
{loading ? ( {loading ? (
<Loader2 className="w-6 h-6 animate-spin" /> <Loader2 className="w-6 h-6 animate-spin" />
@@ -182,6 +230,7 @@ export default function Home() {
)} )}
</button> </button>
</div> </div>
</div>
</form> </form>
{/* Error Message */} {/* Error Message */}
@@ -214,7 +263,6 @@ export default function Home() {
<HashDisplay label="SHA1" value={result.hashes!.sha1} field="sha1-gen" /> <HashDisplay label="SHA1" value={result.hashes!.sha1} field="sha1-gen" />
<HashDisplay label="SHA256" value={result.hashes!.sha256} field="sha256-gen" /> <HashDisplay label="SHA256" value={result.hashes!.sha256} field="sha256-gen" />
<HashDisplay label="SHA512" value={result.hashes!.sha512} field="sha512-gen" /> <HashDisplay label="SHA512" value={result.hashes!.sha512} field="sha512-gen" />
<HashDisplay label="Bcrypt" value={result.hashes!.bcrypt} field="bcrypt-gen" />
</div> </div>
{result.wasGenerated && ( {result.wasGenerated && (
<div className="mt-6 bg-blue-50 border border-blue-200 rounded-xl p-4"> <div className="mt-6 bg-blue-50 border border-blue-200 rounded-xl p-4">
@@ -260,7 +308,6 @@ export default function Home() {
<HashDisplay label="SHA1" value={item.hashes.sha1} field={`sha1-${idx}`} /> <HashDisplay label="SHA1" value={item.hashes.sha1} field={`sha1-${idx}`} />
<HashDisplay label="SHA256" value={item.hashes.sha256} field={`sha256-${idx}`} /> <HashDisplay label="SHA256" value={item.hashes.sha256} field={`sha256-${idx}`} />
<HashDisplay label="SHA512" value={item.hashes.sha512} field={`sha512-${idx}`} /> <HashDisplay label="SHA512" value={item.hashes.sha512} field={`sha512-${idx}`} />
<HashDisplay label="Bcrypt" value={item.hashes.bcrypt} field={`bcrypt-${idx}`} />
</div> </div>
</div> </div>
))} ))}
@@ -304,7 +351,7 @@ export default function Home() {
</div> </div>
<h3 className="text-xl font-bold text-gray-900 mb-2">Generate Hashes</h3> <h3 className="text-xl font-bold text-gray-900 mb-2">Generate Hashes</h3>
<p className="text-gray-600"> <p className="text-gray-600">
Enter any plaintext to instantly generate MD5, SHA1, SHA256, SHA512, and Bcrypt hashes. Results are saved automatically. Enter any plaintext to instantly generate MD5, SHA1, SHA256, and SHA512 hashes. Results are saved automatically.
</p> </p>
</div> </div>
</div> </div>
@@ -319,3 +366,19 @@ export default function Home() {
); );
} }
function LoadingFallback() {
return (
<div className="min-h-screen bg-gradient-to-br from-blue-50 via-white to-purple-50 flex items-center justify-center">
<Loader2 className="w-12 h-12 text-blue-600 animate-spin" />
</div>
);
}
export default function Home() {
return (
<Suspense fallback={<LoadingFallback />}>
<HasherContent />
</Suspense>
);
}

Ver fichero

@@ -46,9 +46,6 @@ export const INDEX_MAPPING = {
sha512: { sha512: {
type: 'keyword' as const type: 'keyword' as const
}, },
bcrypt: {
type: 'keyword' as const
},
created_at: { created_at: {
type: 'date' as const type: 'date' as const
} }

Ver fichero

@@ -1,5 +1,4 @@
import crypto from 'crypto'; import crypto from 'crypto';
import bcrypt from 'bcrypt';
export interface HashResult { export interface HashResult {
plaintext: string; plaintext: string;
@@ -7,22 +6,18 @@ export interface HashResult {
sha1: string; sha1: string;
sha256: string; sha256: string;
sha512: string; sha512: string;
bcrypt: string;
} }
/** /**
* Generate all common hashes for a given plaintext * Generate all common hashes for a given plaintext
*/ */
export async function generateHashes(plaintext: string): Promise<HashResult> { export function generateHashes(plaintext: string): HashResult {
const bcryptHash = await bcrypt.hash(plaintext, 10);
return { return {
plaintext, plaintext,
md5: crypto.createHash('md5').update(plaintext).digest('hex'), md5: crypto.createHash('md5').update(plaintext).digest('hex'),
sha1: crypto.createHash('sha1').update(plaintext).digest('hex'), sha1: crypto.createHash('sha1').update(plaintext).digest('hex'),
sha256: crypto.createHash('sha256').update(plaintext).digest('hex'), sha256: crypto.createHash('sha256').update(plaintext).digest('hex'),
sha512: crypto.createHash('sha512').update(plaintext).digest('hex'), sha512: crypto.createHash('sha512').update(plaintext).digest('hex'),
bcrypt: bcryptHash,
}; };
} }
@@ -52,11 +47,6 @@ export function detectHashType(hash: string): string | null {
return 'sha512'; return 'sha512';
} }
// BCrypt: starts with $2a$, $2b$, $2x$, or $2y$
if (/^\$2[abxy]\$/.test(cleanHash)) {
return 'bcrypt';
}
return null; return null;
} }
@@ -66,14 +56,3 @@ export function detectHashType(hash: string): string | null {
export function isHash(input: string): boolean { export function isHash(input: string): boolean {
return detectHashType(input) !== null; return detectHashType(input) !== null;
} }
/**
* Verify a plaintext against a bcrypt hash
*/
export async function verifyBcrypt(plaintext: string, hash: string): Promise<boolean> {
try {
return await bcrypt.compare(plaintext, hash);
} catch (_error) {
return false;
}
}

Ver fichero

@@ -34,12 +34,11 @@
"build": "next build", "build": "next build",
"start": "next start", "start": "next start",
"lint": "eslint", "lint": "eslint",
"index-file": "tsx scripts/index-file.ts" "index-file": "tsx scripts/index-file.ts",
"remove-duplicates": "tsx scripts/remove-duplicates.ts"
}, },
"dependencies": { "dependencies": {
"@elastic/elasticsearch": "^9.2.0", "@elastic/elasticsearch": "^9.2.0",
"@types/bcrypt": "^6.0.0",
"bcrypt": "^6.0.0",
"lucide-react": "^0.555.0", "lucide-react": "^0.555.0",
"next": "15.4.8", "next": "15.4.8",
"react": "19.1.2", "react": "19.1.2",

Ver fichero

@@ -1,7 +1,7 @@
{ {
"name": "Hasher - Hash Search & Generator", "name": "Hasher - Hash Search & Generator",
"short_name": "Hasher", "short_name": "Hasher",
"description": "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt.", "description": "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512.",
"start_url": "/", "start_url": "/",
"display": "standalone", "display": "standalone",
"background_color": "#ffffff", "background_color": "#ffffff",

Ver fichero

@@ -35,7 +35,6 @@ interface HashDocument {
sha1: string; sha1: string;
sha256: string; sha256: string;
sha512: string; sha512: string;
bcrypt: string;
created_at: string; created_at: string;
} }
@@ -157,17 +156,13 @@ function deleteState(stateFile: string): void {
} }
} }
async function generateHashes(plaintext: string): Promise<HashDocument> { function generateHashes(plaintext: string): HashDocument {
const bcrypt = await import('bcrypt');
const bcryptHash = await bcrypt.default.hash(plaintext, 10);
return { return {
plaintext, plaintext,
md5: crypto.createHash('md5').update(plaintext).digest('hex'), md5: crypto.createHash('md5').update(plaintext).digest('hex'),
sha1: crypto.createHash('sha1').update(plaintext).digest('hex'), sha1: crypto.createHash('sha1').update(plaintext).digest('hex'),
sha256: crypto.createHash('sha256').update(plaintext).digest('hex'), sha256: crypto.createHash('sha256').update(plaintext).digest('hex'),
sha512: crypto.createHash('sha512').update(plaintext).digest('hex'), sha512: crypto.createHash('sha512').update(plaintext).digest('hex'),
bcrypt: bcryptHash,
created_at: new Date().toISOString() created_at: new Date().toISOString()
}; };
} }
@@ -313,12 +308,10 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
const bulkOperations: any[] = []; const bulkOperations: any[] = [];
// Generate hashes for all items in batch first // Generate hashes for all items in batch first
const batchWithHashes = await Promise.all( const batchWithHashes = batch.map((plaintext: string) => ({
batch.map(async (plaintext: string) => ({
plaintext, plaintext,
hashes: await generateHashes(plaintext) hashes: generateHashes(plaintext)
})) }));
);
if (checkDuplicates) { if (checkDuplicates) {
// Check which items already exist (by plaintext or any hash) // Check which items already exist (by plaintext or any hash)

496
scripts/remove-duplicates.ts Archivo normal
Ver fichero

@@ -0,0 +1,496 @@
#!/usr/bin/env node
/**
* Hasher Duplicate Remover Script
*
* This script finds and removes duplicate entries from the Elasticsearch index.
* It identifies duplicates by checking plaintext, md5, sha1, sha256, and sha512 fields.
*
* Usage:
* npx tsx scripts/remove-duplicates.ts [options]
* npm run remove-duplicates [-- options]
*
* Options:
* --dry-run Show duplicates without removing them (default)
* --execute Actually remove the duplicates
* --batch-size=<number> Number of items to process in each batch (default: 1000)
* --field=<field> Check duplicates only on this field (plaintext, md5, sha1, sha256, sha512)
* --help, -h Show this help message
*/
import { Client } from '@elastic/elasticsearch';
const ELASTICSEARCH_NODE = process.env.ELASTICSEARCH_NODE || 'http://localhost:9200';
const INDEX_NAME = 'hasher';
const DEFAULT_BATCH_SIZE = 1000;
interface ParsedArgs {
dryRun: boolean;
batchSize: number;
field: string | null;
showHelp: boolean;
}
interface DuplicateGroup {
value: string;
field: string;
documentIds: string[];
keepId: string;
deleteIds: string[];
}
function parseArgs(args: string[]): ParsedArgs {
const result: ParsedArgs = {
dryRun: true,
batchSize: DEFAULT_BATCH_SIZE,
field: null,
showHelp: false
};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg === '--help' || arg === '-h') {
result.showHelp = true;
} else if (arg === '--dry-run') {
result.dryRun = true;
} else if (arg === '--execute') {
result.dryRun = false;
} else if (arg.startsWith('--batch-size=')) {
const value = arg.split('=')[1];
const parsed = parseInt(value, 10);
if (!isNaN(parsed) && parsed > 0) {
result.batchSize = parsed;
}
} else if (arg === '--batch-size') {
const nextArg = args[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
const parsed = parseInt(nextArg, 10);
if (!isNaN(parsed) && parsed > 0) {
result.batchSize = parsed;
i++;
}
}
} else if (arg.startsWith('--field=')) {
result.field = arg.split('=')[1];
} else if (arg === '--field') {
const nextArg = args[i + 1];
if (nextArg && !nextArg.startsWith('-')) {
result.field = nextArg;
i++;
}
}
}
return result;
}
function showHelp() {
console.log(`
Hasher Duplicate Remover Script
Usage:
npx tsx scripts/remove-duplicates.ts [options]
npm run remove-duplicates [-- options]
Options:
--dry-run Show duplicates without removing them (default)
--execute Actually remove the duplicates
--batch-size=<number> Number of items to process in each batch (default: 1000)
--field=<field> Check duplicates only on this field
Valid fields: plaintext, md5, sha1, sha256, sha512
--help, -h Show this help message
Environment Variables:
ELASTICSEARCH_NODE Elasticsearch node URL (default: http://localhost:9200)
Examples:
npx tsx scripts/remove-duplicates.ts # Dry run, show all duplicates
npx tsx scripts/remove-duplicates.ts --execute # Remove all duplicates
npx tsx scripts/remove-duplicates.ts --field=md5 # Check only md5 duplicates
npx tsx scripts/remove-duplicates.ts --execute --field=plaintext
Notes:
- The script keeps the OLDEST document (by created_at) and removes newer duplicates
- Always run with --dry-run first to review what will be deleted
- Duplicates are checked across all hash fields by default
`);
process.exit(0);
}
async function findDuplicatesForField(
client: Client,
field: string,
batchSize: number
): Promise<DuplicateGroup[]> {
const duplicates: DuplicateGroup[] = [];
// Use aggregation to find duplicate values
const fieldToAggregate = field === 'plaintext' ? 'plaintext.keyword' : field;
// Use composite aggregation to handle large number of duplicates
let afterKey: any = undefined;
let hasMore = true;
console.log(` Scanning for duplicates...`);
while (hasMore) {
const aggQuery: any = {
index: INDEX_NAME,
size: 0,
aggs: {
duplicates: {
composite: {
size: batchSize,
sources: [
{ value: { terms: { field: fieldToAggregate } } }
],
...(afterKey && { after: afterKey })
},
aggs: {
doc_count_filter: {
bucket_selector: {
buckets_path: { count: '_count' },
script: 'params.count > 1'
}
}
}
}
}
};
const response = await client.search(aggQuery);
const compositeAgg = response.aggregations?.duplicates as any;
const buckets = compositeAgg?.buckets || [];
for (const bucket of buckets) {
if (bucket.doc_count > 1) {
const value = bucket.key.value;
// Use scroll API for large result sets
const documentIds: string[] = [];
let scrollResponse = await client.search({
index: INDEX_NAME,
scroll: '1m',
size: 1000,
query: {
term: {
[fieldToAggregate]: value
}
},
sort: [
{ created_at: { order: 'asc' } }
],
_source: false
});
while (scrollResponse.hits.hits.length > 0) {
documentIds.push(...scrollResponse.hits.hits.map((hit: any) => hit._id));
if (!scrollResponse._scroll_id) break;
scrollResponse = await client.scroll({
scroll_id: scrollResponse._scroll_id,
scroll: '1m'
});
}
// Clear scroll
if (scrollResponse._scroll_id) {
await client.clearScroll({ scroll_id: scrollResponse._scroll_id }).catch(() => {});
}
if (documentIds.length > 1) {
duplicates.push({
value: String(value),
field,
documentIds,
keepId: documentIds[0], // Keep the oldest
deleteIds: documentIds.slice(1) // Delete the rest
});
}
}
}
// Check if there are more results
afterKey = compositeAgg?.after_key;
hasMore = buckets.length === batchSize && afterKey;
if (hasMore) {
process.stdout.write(`\r Found ${duplicates.length} duplicate groups so far...`);
}
}
return duplicates;
}
/**
* Phase 1: Initialize and connect to Elasticsearch
*/
async function phase1_InitAndConnect() {
console.log(`🔍 Hasher Duplicate Remover - Phase 1: Initialization`);
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`Elasticsearch: ${ELASTICSEARCH_NODE}`);
console.log(`Index: ${INDEX_NAME}`);
console.log('');
const client = new Client({ node: ELASTICSEARCH_NODE });
console.log('🔗 Connecting to Elasticsearch...');
await client.cluster.health({});
console.log('✅ Connected successfully\n');
const countResponse = await client.count({ index: INDEX_NAME });
console.log(`📊 Total documents in index: ${countResponse.count}\n`);
return { client, totalDocuments: countResponse.count };
}
/**
* Phase 2: Find duplicates for a specific field
*/
async function phase2_FindDuplicatesForField(
client: Client,
field: string,
batchSize: number,
seenDeleteIds: Set<string>
): Promise<{ duplicates: DuplicateGroup[], totalFound: number }> {
console.log(`\n🔍 Phase 2: Checking duplicates for field: ${field}...`);
const fieldDuplicates = await findDuplicatesForField(client, field, batchSize);
const duplicates: DuplicateGroup[] = [];
// Filter out already seen delete IDs to avoid counting the same document multiple times
for (const dup of fieldDuplicates) {
const newDeleteIds = dup.deleteIds.filter(id => !seenDeleteIds.has(id));
if (newDeleteIds.length > 0) {
dup.deleteIds = newDeleteIds;
newDeleteIds.forEach(id => seenDeleteIds.add(id));
duplicates.push(dup);
}
}
console.log(` Found ${fieldDuplicates.length} duplicate groups for ${field}`);
console.log(` New unique documents to delete: ${duplicates.reduce((sum, dup) => sum + dup.deleteIds.length, 0)}`);
// Force garbage collection if available
if (global.gc) {
global.gc();
console.log(` ♻️ Memory freed after processing ${field}`);
}
return { duplicates, totalFound: fieldDuplicates.length };
}
/**
* Phase 3: Process deletion for a batch of duplicates
*/
async function phase3_DeleteBatch(
client: Client,
deleteIds: string[],
batchSize: number,
startIndex: number
): Promise<{ deleted: number, errors: number }> {
const batch = deleteIds.slice(startIndex, startIndex + batchSize);
let deleted = 0;
let errors = 0;
try {
const bulkOperations = batch.flatMap(id => [
{ delete: { _index: INDEX_NAME, _id: id } }
]);
const bulkResponse = await client.bulk({
operations: bulkOperations,
refresh: false
});
if (bulkResponse.errors) {
const errorCount = bulkResponse.items.filter((item: any) => item.delete?.error).length;
errors += errorCount;
deleted += batch.length - errorCount;
} else {
deleted += batch.length;
}
} catch (error) {
console.error(`\n❌ Error deleting batch:`, error);
errors += batch.length;
}
// Force garbage collection if available
if (global.gc) {
global.gc();
}
return { deleted, errors };
}
/**
* Phase 4: Finalize and report results
*/
async function phase4_Finalize(
client: Client,
totalDeleted: number,
totalErrors: number,
initialDocumentCount: number
) {
console.log('\n\n🔄 Phase 4: Refreshing index...');
await client.indices.refresh({ index: INDEX_NAME });
const newCountResponse = await client.count({ index: INDEX_NAME });
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
console.log('✅ Duplicate removal complete!');
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`Documents deleted: ${totalDeleted}`);
console.log(`Errors: ${totalErrors}`);
console.log(`Previous document count: ${initialDocumentCount}`);
console.log(`New document count: ${newCountResponse.count}`);
console.log('');
}
async function removeDuplicates(parsedArgs: ParsedArgs) {
const fields = parsedArgs.field
? [parsedArgs.field]
: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];
console.log(`Mode: ${parsedArgs.dryRun ? '🔎 DRY RUN (no changes)' : '⚠️ EXECUTE (will delete)'}`);
console.log(`Batch size: ${parsedArgs.batchSize}`);
console.log(`Fields to check: ${fields.join(', ')}`);
console.log('');
try {
// === PHASE 1: Initialize ===
const { client, totalDocuments } = await phase1_InitAndConnect();
// Force garbage collection after phase 1
if (global.gc) {
global.gc();
console.log('♻️ Memory freed after initialization\n');
}
// === PHASE 2: Find duplicates field by field ===
const allDuplicates: DuplicateGroup[] = [];
const seenDeleteIds = new Set<string>();
for (const field of fields) {
const { duplicates } = await phase2_FindDuplicatesForField(
client,
field,
parsedArgs.batchSize,
seenDeleteIds
);
allDuplicates.push(...duplicates);
// Clear field duplicates to free memory
duplicates.length = 0;
}
const totalToDelete = allDuplicates.reduce((sum, dup) => sum + dup.deleteIds.length, 0);
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`📋 Summary:`);
console.log(` Duplicate groups found: ${allDuplicates.length}`);
console.log(` Documents to delete: ${totalToDelete}`);
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
if (allDuplicates.length === 0) {
console.log('✨ No duplicates found! Index is clean.\n');
return;
}
// Show sample of duplicates
console.log(`📝 Sample duplicates (showing first 10):\n`);
const samplesToShow = allDuplicates.slice(0, 10);
for (const dup of samplesToShow) {
const truncatedValue = dup.value.length > 50
? dup.value.substring(0, 50) + '...'
: dup.value;
console.log(` Field: ${dup.field}`);
console.log(` Value: ${truncatedValue}`);
console.log(` Keep: ${dup.keepId}`);
console.log(` Delete: ${dup.deleteIds.length} document(s)`);
console.log('');
}
if (allDuplicates.length > 10) {
console.log(` ... and ${allDuplicates.length - 10} more duplicate groups\n`);
}
if (parsedArgs.dryRun) {
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
console.log(`🔎 DRY RUN - No changes made`);
console.log(` Run with --execute to remove ${totalToDelete} duplicate documents`);
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
return;
}
// === PHASE 3: Execute deletion in batches ===
console.log(`\n🗑 Phase 3: Removing ${totalToDelete} duplicate documents...\n`);
let totalDeleted = 0;
let totalErrors = 0;
const deleteIds = allDuplicates.flatMap(dup => dup.deleteIds);
// Clear allDuplicates to free memory
allDuplicates.length = 0;
// Delete in batches with memory management
for (let i = 0; i < deleteIds.length; i += parsedArgs.batchSize) {
const { deleted, errors } = await phase3_DeleteBatch(
client,
deleteIds,
parsedArgs.batchSize,
i
);
totalDeleted += deleted;
totalErrors += errors;
process.stdout.write(
`\r⏳ Progress: ${Math.min(i + parsedArgs.batchSize, deleteIds.length)}/${deleteIds.length} - ` +
`Deleted: ${totalDeleted}, Errors: ${totalErrors}`
);
}
// Clear deleteIds to free memory
deleteIds.length = 0;
seenDeleteIds.clear();
// === PHASE 4: Finalize ===
await phase4_Finalize(client, totalDeleted, totalErrors, totalDocuments);
} catch (error) {
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
process.exit(1);
}
}
// Parse command line arguments
const args = process.argv.slice(2);
const parsedArgs = parseArgs(args);
if (parsedArgs.showHelp) {
showHelp();
}
// Validate field if provided
const validFields = ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];
if (parsedArgs.field && !validFields.includes(parsedArgs.field)) {
console.error(`❌ Invalid field: ${parsedArgs.field}`);
console.error(` Valid fields: ${validFields.join(', ')}`);
process.exit(1);
}
console.log(`\n🔧 Configuration:`);
console.log(` Mode: ${parsedArgs.dryRun ? 'dry-run' : 'execute'}`);
console.log(` Batch size: ${parsedArgs.batchSize}`);
if (parsedArgs.field) {
console.log(` Field: ${parsedArgs.field}`);
} else {
console.log(` Fields: all (plaintext, md5, sha1, sha256, sha512)`);
}
console.log('');
removeDuplicates(parsedArgs).catch(console.error);