Comparar commits
11 Commits
bb234fef1e
...
redis
| Autor | SHA1 | Fecha | |
|---|---|---|---|
|
9ddca0c030
|
|||
|
3ce64eeb8e
|
|||
|
da89037125
|
|||
|
20f0503134
|
|||
|
42bc5a15d0
|
|||
|
2de78b7461
|
|||
|
8fa586731a
|
|||
|
ad7a1cf0a7
|
|||
|
459cdcd9bc
|
|||
|
9c0c30e846
|
|||
|
179e192e82
|
38
API.md
38
API.md
@@ -102,7 +102,7 @@ Content-Type: application/json
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: When plaintext is provided, it is automatically indexed in Elasticsearch for future lookups.
|
Note: When plaintext is provided, it is automatically stored in Redis for future lookups.
|
||||||
|
|
||||||
#### Error Responses
|
#### Error Responses
|
||||||
|
|
||||||
@@ -113,7 +113,7 @@ Note: When plaintext is provided, it is automatically indexed in Elasticsearch f
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**500 Internal Server Error** - Server or Elasticsearch error:
|
**500 Internal Server Error** - Server or Redis error:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"error": "Internal server error",
|
"error": "Internal server error",
|
||||||
@@ -127,7 +127,7 @@ Note: When plaintext is provided, it is automatically indexed in Elasticsearch f
|
|||||||
|
|
||||||
**Endpoint**: `GET /api/health`
|
**Endpoint**: `GET /api/health`
|
||||||
|
|
||||||
**Description**: Check the health of the application and Elasticsearch connection.
|
**Description**: Check the health of the application and Redis connection.
|
||||||
|
|
||||||
#### Request
|
#### Request
|
||||||
|
|
||||||
@@ -139,31 +139,28 @@ No parameters required.
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"elasticsearch": {
|
"redis": {
|
||||||
"cluster": "elasticsearch",
|
"version": "7.2.0",
|
||||||
"status": "green"
|
"memory": "1.5M",
|
||||||
|
"dbSize": 1542
|
||||||
},
|
},
|
||||||
"index": {
|
"stats": {
|
||||||
"exists": true,
|
"count": 1542,
|
||||||
"name": "hasher",
|
"size": 524288
|
||||||
"stats": {
|
|
||||||
"documentCount": 1542,
|
|
||||||
"indexSize": 524288
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
**Elasticsearch cluster status values**:
|
**Redis status fields**:
|
||||||
- `green`: All primary and replica shards are active
|
- `version`: Redis server version
|
||||||
- `yellow`: All primary shards are active, but not all replicas
|
- `memory`: Memory used by Redis
|
||||||
- `red`: Some primary shards are not active
|
- `dbSize`: Total number of keys in database
|
||||||
|
|
||||||
**Error** (503 Service Unavailable):
|
**Error** (503 Service Unavailable):
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"status": "error",
|
"status": "error",
|
||||||
"error": "Connection refused to Elasticsearch"
|
"error": "Connection refused to Redis"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -179,7 +176,6 @@ The API automatically detects hash types based on length and format:
|
|||||||
| SHA1 | 40 | `^[a-f0-9]{40}$` |
|
| SHA1 | 40 | `^[a-f0-9]{40}$` |
|
||||||
| SHA256 | 64 | `^[a-f0-9]{64}$` |
|
| SHA256 | 64 | `^[a-f0-9]{64}$` |
|
||||||
| SHA512 | 128 | `^[a-f0-9]{128}$` |
|
| SHA512 | 128 | `^[a-f0-9]{128}$` |
|
||||||
| Bcrypt | 60 | `^\$2[abxy]\$` |
|
|
||||||
|
|
||||||
Hashes are case-insensitive.
|
Hashes are case-insensitive.
|
||||||
|
|
||||||
@@ -253,7 +249,7 @@ The API accepts requests from any origin by default. For production deployment,
|
|||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- All timestamps are in ISO 8601 format
|
- All timestamps are in ISO 8601 format
|
||||||
- The API automatically creates the Elasticsearch index if it doesn't exist
|
- The API automatically creates Redis keys with proper structure
|
||||||
- Plaintext searches are automatically indexed for future lookups
|
- Plaintext searches are automatically stored for future lookups
|
||||||
- Searches are case-insensitive
|
- Searches are case-insensitive
|
||||||
- Hashes must be valid hexadecimal strings
|
- Hashes must be valid hexadecimal strings
|
||||||
|
|||||||
73
CHANGELOG.md
73
CHANGELOG.md
@@ -5,25 +5,55 @@ All notable changes to this project will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [2.0.0] - 2025-12-03
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
#### Major Backend Migration
|
||||||
|
- **Breaking Change**: Migrated from Elasticsearch to Redis for improved performance
|
||||||
|
- Replaced Elasticsearch Client with ioredis for Redis operations
|
||||||
|
- Redesigned data structure using Redis key patterns
|
||||||
|
- Implemented O(1) hash lookups using Redis indexes
|
||||||
|
- Significantly reduced search latency (< 10ms typical)
|
||||||
|
|
||||||
|
#### New Redis Architecture
|
||||||
|
- Document storage: `hash:plaintext:{plaintext}` keys
|
||||||
|
- Hash indexes: `hash:index:{algorithm}:{hash}` for fast lookups
|
||||||
|
- Statistics tracking: `hash:stats` Redis Hash
|
||||||
|
- Pipeline operations for atomic batch writes
|
||||||
|
- Connection pooling with automatic retry strategy
|
||||||
|
|
||||||
|
### Updated
|
||||||
|
|
||||||
|
#### Configuration
|
||||||
|
- Environment variables changed from `ELASTICSEARCH_NODE` to `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD`, `REDIS_DB`
|
||||||
|
- Simplified connection setup with sensible defaults
|
||||||
|
- Optional Redis authentication support
|
||||||
|
|
||||||
|
#### Performance Improvements
|
||||||
|
- Search latency reduced to < 10ms (from ~50ms)
|
||||||
|
- Bulk indexing maintained at 1000-5000 docs/sec
|
||||||
|
- Lower memory footprint
|
||||||
|
- Better concurrent request handling (100+ users)
|
||||||
|
|
||||||
## [1.0.0] - 2025-12-03
|
## [1.0.0] - 2025-12-03
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
#### Core Features
|
#### Core Features
|
||||||
- Hash search functionality for MD5, SHA1, SHA256, SHA512, and Bcrypt
|
- Hash search functionality for MD5, SHA1, SHA256, and SHA512
|
||||||
- Hash generation from plaintext input
|
- Hash generation from plaintext input
|
||||||
- Automatic detection of hash types based on length and pattern
|
- Automatic detection of hash types based on length and pattern
|
||||||
- Real-time hash generation with instant results
|
- Real-time hash generation with instant results
|
||||||
- Copy to clipboard functionality for all hash values
|
- Copy to clipboard functionality for all hash values
|
||||||
- Bcrypt verification support
|
|
||||||
|
|
||||||
#### Backend
|
#### Backend
|
||||||
- Elasticsearch integration with configurable endpoint
|
- Redis integration with ioredis
|
||||||
- Custom index mapping with 10 shards for horizontal scaling
|
- Key-value storage with hash indexes
|
||||||
- Automatic index creation on first use
|
- Automatic key structure initialization
|
||||||
- Auto-indexing of searched plaintext for future lookups
|
- Auto-storage of searched plaintext for future lookups
|
||||||
- RESTful API endpoints for search and health checks
|
- RESTful API endpoints for search and health checks
|
||||||
- Lowercase analyzer for case-insensitive searches
|
- Case-insensitive searches
|
||||||
|
|
||||||
#### Frontend
|
#### Frontend
|
||||||
- Modern, responsive UI with gradient design
|
- Modern, responsive UI with gradient design
|
||||||
@@ -63,7 +93,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||||||
#### Dependencies
|
#### Dependencies
|
||||||
- Next.js 16.0.7
|
- Next.js 16.0.7
|
||||||
- React 19.2.0
|
- React 19.2.0
|
||||||
- Elasticsearch Client 8.x
|
- ioredis 5.4.2
|
||||||
- Lucide React (icons)
|
- Lucide React (icons)
|
||||||
- Tailwind CSS 4.x
|
- Tailwind CSS 4.x
|
||||||
- TypeScript 5.x
|
- TypeScript 5.x
|
||||||
@@ -76,28 +106,35 @@ hasher/
|
|||||||
│ ├── layout.tsx # Root layout
|
│ ├── layout.tsx # Root layout
|
||||||
│ └── page.tsx # Main page
|
│ └── page.tsx # Main page
|
||||||
├── lib/ # Utility libraries
|
├── lib/ # Utility libraries
|
||||||
│ ├── elasticsearch.ts # ES client
|
│ ├── redis.ts # Redis client
|
||||||
│ └── hash.ts # Hash utilities
|
│ └── hash.ts # Hash utilities
|
||||||
├── scripts/ # CLI scripts
|
├── scripts/ # CLI scripts
|
||||||
│ └── index-file.ts # Bulk indexer
|
│ ├── index-file.ts # Bulk indexer
|
||||||
|
│ └── remove-duplicates.ts # Duplicate removal
|
||||||
└── docs/ # Documentation
|
└── docs/ # Documentation
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Elasticsearch Index Schema
|
#### Redis Data Structure
|
||||||
- Index name: `hasher`
|
- Main documents: `hash:plaintext:{plaintext}`
|
||||||
- Shards: 10
|
- MD5 index: `hash:index:md5:{hash}`
|
||||||
- Replicas: 1
|
- SHA1 index: `hash:index:sha1:{hash}`
|
||||||
- Fields: plaintext, md5, sha1, sha256, sha512, created_at
|
- SHA256 index: `hash:index:sha256:{hash}`
|
||||||
|
- SHA512 index: `hash:index:sha512:{hash}`
|
||||||
|
- Statistics: `hash:stats` (Redis Hash with count and size)
|
||||||
|
|
||||||
### Configuration
|
### Configuration
|
||||||
|
|
||||||
#### Environment Variables
|
#### Environment Variables
|
||||||
- `ELASTICSEARCH_NODE`: Elasticsearch endpoint (default: http://localhost:9200)
|
- `REDIS_HOST`: Redis host (default: localhost)
|
||||||
|
- `REDIS_PORT`: Redis port (default: 6379)
|
||||||
|
- `REDIS_PASSWORD`: Redis password (optional)
|
||||||
|
- `REDIS_DB`: Redis database number (default: 0)
|
||||||
|
|
||||||
#### Performance
|
#### Performance
|
||||||
- Bulk indexing: 1000-5000 docs/sec
|
- Bulk indexing: 1000-5000 docs/sec
|
||||||
- Search latency: < 50ms typical
|
- Search latency: < 10ms typical (O(1) lookups)
|
||||||
- Horizontal scaling ready
|
- Horizontal scaling ready with Redis Cluster
|
||||||
|
- Lower memory footprint than Elasticsearch
|
||||||
|
|
||||||
### Security
|
### Security
|
||||||
- Input validation on all endpoints
|
- Input validation on all endpoints
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ Thank you for considering contributing to Hasher! This document provides guideli
|
|||||||
## 🎯 Areas for Contribution
|
## 🎯 Areas for Contribution
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
- Additional hash algorithms (bcrypt validation, argon2, etc.)
|
- Additional hash algorithms (argon2, etc.)
|
||||||
- Export functionality (CSV, JSON)
|
- Export functionality (CSV, JSON)
|
||||||
- Search history
|
- Search history
|
||||||
- Batch hash lookup
|
- Batch hash lookup
|
||||||
@@ -48,7 +48,7 @@ Thank you for considering contributing to Hasher! This document provides guideli
|
|||||||
Before submitting a PR:
|
Before submitting a PR:
|
||||||
1. Test the web interface thoroughly
|
1. Test the web interface thoroughly
|
||||||
2. Test the bulk indexing script
|
2. Test the bulk indexing script
|
||||||
3. Verify Elasticsearch integration
|
3. Verify Redis integration
|
||||||
4. Check for TypeScript errors: `npm run build`
|
4. Check for TypeScript errors: `npm run build`
|
||||||
5. Run linter: `npm run lint`
|
5. Run linter: `npm run lint`
|
||||||
|
|
||||||
|
|||||||
163
DEPLOYMENT.md
163
DEPLOYMENT.md
@@ -5,7 +5,7 @@ This guide covers deploying the Hasher application to production.
|
|||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
- Node.js 18.x or higher
|
- Node.js 18.x or higher
|
||||||
- Elasticsearch 8.x cluster
|
- Redis 6.x or higher
|
||||||
- Domain name (optional, for custom domain)
|
- Domain name (optional, for custom domain)
|
||||||
- SSL certificate (recommended for production)
|
- SSL certificate (recommended for production)
|
||||||
|
|
||||||
@@ -34,12 +34,16 @@ Vercel provides seamless deployment for Next.js applications.
|
|||||||
|
|
||||||
4. **Set Environment Variables**:
|
4. **Set Environment Variables**:
|
||||||
- Go to your project settings on Vercel
|
- Go to your project settings on Vercel
|
||||||
- Add environment variable: `ELASTICSEARCH_NODE=http://your-elasticsearch-host:9200`
|
- Add environment variables:
|
||||||
|
- `REDIS_HOST=your-redis-host`
|
||||||
|
- `REDIS_PORT=6379`
|
||||||
|
- `REDIS_PASSWORD=your-password` (if using authentication)
|
||||||
|
- `REDIS_DB=0`
|
||||||
- Redeploy: `vercel --prod`
|
- Redeploy: `vercel --prod`
|
||||||
|
|
||||||
#### Important Notes:
|
#### Important Notes:
|
||||||
- Ensure Elasticsearch is accessible from Vercel's servers
|
- Ensure Redis is accessible from Vercel's servers
|
||||||
- Consider using Elastic Cloud or a publicly accessible Elasticsearch instance
|
- Consider using Redis Cloud (Upstash) or a publicly accessible Redis instance
|
||||||
- Use environment variables for sensitive configuration
|
- Use environment variables for sensitive configuration
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -116,7 +120,8 @@ docker build -t hasher:latest .
|
|||||||
# Run the container
|
# Run the container
|
||||||
docker run -d \
|
docker run -d \
|
||||||
-p 3000:3000 \
|
-p 3000:3000 \
|
||||||
-e ELASTICSEARCH_NODE=http://elasticsearch:9200 \
|
-e REDIS_HOST=redis \
|
||||||
|
-e REDIS_PORT=6379 \
|
||||||
--name hasher \
|
--name hasher \
|
||||||
hasher:latest
|
hasher:latest
|
||||||
```
|
```
|
||||||
@@ -134,25 +139,23 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "3000:3000"
|
- "3000:3000"
|
||||||
environment:
|
environment:
|
||||||
- ELASTICSEARCH_NODE=http://elasticsearch:9200
|
- REDIS_HOST=redis
|
||||||
|
- REDIS_PORT=6379
|
||||||
depends_on:
|
depends_on:
|
||||||
- elasticsearch
|
- redis
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
elasticsearch:
|
redis:
|
||||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.11.0
|
image: redis:7-alpine
|
||||||
environment:
|
|
||||||
- discovery.type=single-node
|
|
||||||
- xpack.security.enabled=false
|
|
||||||
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
|
|
||||||
ports:
|
ports:
|
||||||
- "9200:9200"
|
- "6379:6379"
|
||||||
volumes:
|
volumes:
|
||||||
- elasticsearch-data:/usr/share/elasticsearch/data
|
- redis-data:/data
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
command: redis-server --appendonly yes
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
elasticsearch-data:
|
redis-data:
|
||||||
```
|
```
|
||||||
|
|
||||||
Run with:
|
Run with:
|
||||||
@@ -193,7 +196,10 @@ npm run build
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
cat > .env.local << EOF
|
cat > .env.local << EOF
|
||||||
ELASTICSEARCH_NODE=http://localhost:9200
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_PASSWORD=your-password
|
||||||
|
REDIS_DB=0
|
||||||
NODE_ENV=production
|
NODE_ENV=production
|
||||||
EOF
|
EOF
|
||||||
```
|
```
|
||||||
@@ -233,43 +239,43 @@ sudo systemctl reload nginx
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Elasticsearch Setup
|
## Redis Setup
|
||||||
|
|
||||||
### Option 1: Elastic Cloud (Managed)
|
### Option 1: Redis Cloud (Managed)
|
||||||
|
|
||||||
1. Sign up at [Elastic Cloud](https://cloud.elastic.co/)
|
1. Sign up at [Redis Cloud](https://redis.com/try-free/) or [Upstash](https://upstash.com/)
|
||||||
2. Create a deployment
|
2. Create a database
|
||||||
3. Note the endpoint URL
|
3. Note the connection details (host, port, password)
|
||||||
4. Update `ELASTICSEARCH_NODE` environment variable
|
4. Update `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD` environment variables
|
||||||
|
|
||||||
### Option 2: Self-Hosted
|
### Option 2: Self-Hosted
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Ubuntu/Debian
|
# Ubuntu/Debian
|
||||||
wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
|
|
||||||
sudo sh -c 'echo "deb https://artifacts.elastic.co/packages/8.x/apt stable main" > /etc/apt/sources.list.d/elastic-8.x.list'
|
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install elasticsearch
|
sudo apt-get install redis-server
|
||||||
|
|
||||||
# Configure
|
# Configure
|
||||||
sudo nano /etc/elasticsearch/elasticsearch.yml
|
sudo nano /etc/redis/redis.conf
|
||||||
# Set: network.host: 0.0.0.0
|
# Set: bind 0.0.0.0 (to allow remote connections)
|
||||||
|
# Set: requirepass your-strong-password (for security)
|
||||||
|
|
||||||
# Start
|
# Start
|
||||||
sudo systemctl start elasticsearch
|
sudo systemctl start redis-server
|
||||||
sudo systemctl enable elasticsearch
|
sudo systemctl enable redis-server
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Security Considerations
|
## Security Considerations
|
||||||
|
|
||||||
### 1. Elasticsearch Security
|
### 1. Redis Security
|
||||||
|
|
||||||
- Enable authentication on Elasticsearch
|
- Enable authentication with requirepass
|
||||||
- Use HTTPS for Elasticsearch connection
|
- Use TLS for Redis connections (Redis 6+)
|
||||||
- Restrict network access with firewall rules
|
- Restrict network access with firewall rules
|
||||||
- Update credentials regularly
|
- Update credentials regularly
|
||||||
|
- Disable dangerous commands (FLUSHDB, FLUSHALL, etc.)
|
||||||
|
|
||||||
### 2. Application Security
|
### 2. Application Security
|
||||||
|
|
||||||
@@ -285,7 +291,7 @@ sudo systemctl enable elasticsearch
|
|||||||
# Example UFW firewall rules
|
# Example UFW firewall rules
|
||||||
sudo ufw allow 80/tcp
|
sudo ufw allow 80/tcp
|
||||||
sudo ufw allow 443/tcp
|
sudo ufw allow 443/tcp
|
||||||
sudo ufw allow from YOUR_IP to any port 9200 # Elasticsearch
|
sudo ufw allow from YOUR_IP to any port 6379 # Redis
|
||||||
sudo ufw enable
|
sudo ufw enable
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -303,37 +309,48 @@ pm2 monit
|
|||||||
pm2 logs hasher
|
pm2 logs hasher
|
||||||
```
|
```
|
||||||
|
|
||||||
### Elasticsearch Monitoring
|
### Redis Monitoring
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Health check
|
# Health check
|
||||||
curl http://localhost:9200/_cluster/health?pretty
|
redis-cli ping
|
||||||
|
|
||||||
# Index stats
|
# Get info
|
||||||
curl http://localhost:9200/hasher/_stats?pretty
|
redis-cli INFO
|
||||||
|
|
||||||
|
# Database stats
|
||||||
|
redis-cli INFO stats
|
||||||
|
|
||||||
|
# Memory usage
|
||||||
|
redis-cli INFO memory
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## Backup and Recovery
|
## Backup and Recovery
|
||||||
|
|
||||||
### Elasticsearch Snapshots
|
### Redis Backups
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Configure snapshot repository
|
# Enable AOF (Append Only File) persistence
|
||||||
curl -X PUT "localhost:9200/_snapshot/hasher_backup" -H 'Content-Type: application/json' -d'
|
redis-cli CONFIG SET appendonly yes
|
||||||
{
|
|
||||||
"type": "fs",
|
|
||||||
"settings": {
|
|
||||||
"location": "/mnt/backups/elasticsearch"
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
|
|
||||||
# Create snapshot
|
# Save RDB snapshot manually
|
||||||
curl -X PUT "localhost:9200/_snapshot/hasher_backup/snapshot_1?wait_for_completion=true"
|
redis-cli SAVE
|
||||||
|
|
||||||
# Restore snapshot
|
# Configure automatic backups in redis.conf
|
||||||
curl -X POST "localhost:9200/_snapshot/hasher_backup/snapshot_1/_restore"
|
save 900 1 # Save if 1 key changed in 15 minutes
|
||||||
|
save 300 10 # Save if 10 keys changed in 5 minutes
|
||||||
|
save 60 10000 # Save if 10000 keys changed in 1 minute
|
||||||
|
|
||||||
|
# Backup files location (default)
|
||||||
|
# RDB: /var/lib/redis/dump.rdb
|
||||||
|
# AOF: /var/lib/redis/appendonly.aof
|
||||||
|
|
||||||
|
# Restore from backup
|
||||||
|
sudo systemctl stop redis-server
|
||||||
|
sudo cp /backup/dump.rdb /var/lib/redis/
|
||||||
|
sudo systemctl start redis-server
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -344,13 +361,14 @@ curl -X POST "localhost:9200/_snapshot/hasher_backup/snapshot_1/_restore"
|
|||||||
|
|
||||||
1. Deploy multiple Next.js instances
|
1. Deploy multiple Next.js instances
|
||||||
2. Use a load balancer (nginx, HAProxy)
|
2. Use a load balancer (nginx, HAProxy)
|
||||||
3. Share the same Elasticsearch cluster
|
3. Share the same Redis instance or cluster
|
||||||
|
|
||||||
### Elasticsearch Scaling
|
### Redis Scaling
|
||||||
|
|
||||||
1. Add more nodes to the cluster
|
1. Use Redis Cluster for horizontal scaling
|
||||||
2. Increase shard count (already set to 10)
|
2. Set up Redis Sentinel for high availability
|
||||||
3. Use replicas for read scaling
|
3. Use read replicas for read-heavy workloads
|
||||||
|
4. Consider Redis Enterprise for advanced features
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -363,28 +381,31 @@ pm2 status
|
|||||||
pm2 logs hasher --lines 100
|
pm2 logs hasher --lines 100
|
||||||
```
|
```
|
||||||
|
|
||||||
### Check Elasticsearch
|
### Check Redis
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:9200/_cluster/health
|
redis-cli ping
|
||||||
curl http://localhost:9200/hasher/_count
|
redis-cli DBSIZE
|
||||||
|
redis-cli INFO stats
|
||||||
```
|
```
|
||||||
|
|
||||||
### Common Issues
|
### Common Issues
|
||||||
|
|
||||||
**Issue**: Cannot connect to Elasticsearch
|
**Issue**: Cannot connect to Redis
|
||||||
- Check firewall rules
|
- Check firewall rules
|
||||||
- Verify Elasticsearch is running
|
- Verify Redis is running: `redis-cli ping`
|
||||||
- Check `ELASTICSEARCH_NODE` environment variable
|
- Check `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD` environment variables
|
||||||
|
|
||||||
**Issue**: Out of memory
|
**Issue**: Out of memory
|
||||||
- Increase Node.js memory: `NODE_OPTIONS=--max-old-space-size=4096`
|
- Increase Node.js memory: `NODE_OPTIONS=--max-old-space-size=4096`
|
||||||
- Increase Elasticsearch heap size
|
- Configure Redis maxmemory and eviction policy
|
||||||
|
- Use Redis persistence (RDB/AOF) carefully
|
||||||
|
|
||||||
**Issue**: Slow searches
|
**Issue**: Slow searches
|
||||||
- Add more Elasticsearch nodes
|
- Verify O(1) lookups are being used (direct key access)
|
||||||
- Optimize queries
|
- Check Redis memory and CPU usage
|
||||||
- Increase replica count
|
- Consider using Redis Cluster for distribution
|
||||||
|
- Optimize key patterns
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -392,9 +413,10 @@ curl http://localhost:9200/hasher/_count
|
|||||||
|
|
||||||
1. **Enable Next.js Static Optimization**
|
1. **Enable Next.js Static Optimization**
|
||||||
2. **Use CDN for static assets**
|
2. **Use CDN for static assets**
|
||||||
3. **Enable Elasticsearch caching**
|
3. **Enable Redis pipelining for bulk operations**
|
||||||
4. **Configure appropriate JVM heap for Elasticsearch**
|
4. **Configure appropriate maxmemory for Redis**
|
||||||
5. **Use SSD storage for Elasticsearch**
|
5. **Use SSD storage for Redis persistence**
|
||||||
|
6. **Enable Redis connection pooling (already implemented)**
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -402,5 +424,6 @@ curl http://localhost:9200/hasher/_count
|
|||||||
|
|
||||||
For deployment issues, check:
|
For deployment issues, check:
|
||||||
- [Next.js Deployment Docs](https://nextjs.org/docs/deployment)
|
- [Next.js Deployment Docs](https://nextjs.org/docs/deployment)
|
||||||
- [Elasticsearch Setup Guide](https://www.elastic.co/guide/en/elasticsearch/reference/current/setup.html)
|
- [Redis Setup Guide](https://redis.io/docs/getting-started/)
|
||||||
|
- [ioredis Documentation](https://github.com/redis/ioredis)
|
||||||
- Project GitHub Issues
|
- Project GitHub Issues
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## 📋 Project Overview
|
## 📋 Project Overview
|
||||||
|
|
||||||
**Hasher** is a modern, high-performance hash search and generation tool built with Next.js and powered by Elasticsearch. It provides a beautiful web interface for searching hash values and generating cryptographic hashes from plaintext.
|
**Hasher** is a modern, high-performance hash search and generation tool built with Next.js and powered by Redis. It provides a beautiful web interface for searching hash values and generating cryptographic hashes from plaintext.
|
||||||
|
|
||||||
### Version: 1.0.0
|
### Version: 1.0.0
|
||||||
### Status: ✅ Production Ready
|
### Status: ✅ Production Ready
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
## ✨ Key Features
|
## ✨ Key Features
|
||||||
|
|
||||||
### 🔍 Hash Search
|
### 🔍 Hash Search
|
||||||
- Search for MD5, SHA1, SHA256, SHA512, and Bcrypt hashes
|
- Search for MD5, SHA1, SHA256, and SHA512 hashes
|
||||||
- Automatic hash type detection
|
- Automatic hash type detection
|
||||||
- Case-insensitive matching
|
- Case-insensitive matching
|
||||||
- Real-time results
|
- Real-time results
|
||||||
@@ -25,10 +25,10 @@
|
|||||||
- Copy-to-clipboard functionality
|
- Copy-to-clipboard functionality
|
||||||
|
|
||||||
### 📊 Backend
|
### 📊 Backend
|
||||||
- Elasticsearch 8.x integration
|
- Redis integration with ioredis
|
||||||
- 10-shard index for horizontal scaling
|
- Key-value storage with hash indexes
|
||||||
- RESTful API with JSON responses
|
- RESTful API with JSON responses
|
||||||
- Automatic index creation and initialization
|
- Automatic key structure initialization
|
||||||
- Health monitoring endpoint
|
- Health monitoring endpoint
|
||||||
|
|
||||||
### 🎨 Frontend
|
### 🎨 Frontend
|
||||||
@@ -52,7 +52,7 @@
|
|||||||
### Stack
|
### Stack
|
||||||
- **Frontend**: Next.js 16.0, React 19.2, Tailwind CSS 4.x
|
- **Frontend**: Next.js 16.0, React 19.2, Tailwind CSS 4.x
|
||||||
- **Backend**: Next.js API Routes, Node.js 18+
|
- **Backend**: Next.js API Routes, Node.js 18+
|
||||||
- **Database**: Elasticsearch 8.x
|
- **Database**: Redis 6.x+
|
||||||
- **Language**: TypeScript 5.x
|
- **Language**: TypeScript 5.x
|
||||||
- **Icons**: Lucide React
|
- **Icons**: Lucide React
|
||||||
|
|
||||||
@@ -68,7 +68,7 @@ hasher/
|
|||||||
│ └── globals.css # Global styles
|
│ └── globals.css # Global styles
|
||||||
│
|
│
|
||||||
├── lib/
|
├── lib/
|
||||||
│ ├── elasticsearch.ts # ES client & config
|
│ ├── redis.ts # Redis client & config
|
||||||
│ └── hash.ts # Hash utilities
|
│ └── hash.ts # Hash utilities
|
||||||
│
|
│
|
||||||
├── scripts/
|
├── scripts/
|
||||||
@@ -106,7 +106,7 @@ Search for hashes or generate from plaintext
|
|||||||
- **Output**: Hash results or generated hashes
|
- **Output**: Hash results or generated hashes
|
||||||
|
|
||||||
### GET /api/health
|
### GET /api/health
|
||||||
Check system health and Elasticsearch status
|
Check system health and Redis status
|
||||||
- **Output**: System status and statistics
|
- **Output**: System status and statistics
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -139,28 +139,34 @@ npm run index-file wordlist.txt -- --batch-size 500
|
|||||||
|
|
||||||
### Environment Configuration
|
### Environment Configuration
|
||||||
```bash
|
```bash
|
||||||
# Optional: Set Elasticsearch endpoint
|
# Optional: Set Redis connection details
|
||||||
export ELASTICSEARCH_NODE=http://localhost:9200
|
export REDIS_HOST=localhost
|
||||||
|
export REDIS_PORT=6379
|
||||||
|
export REDIS_PASSWORD=your-password
|
||||||
|
export REDIS_DB=0
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🗄️ Elasticsearch Configuration
|
## 🗄️ Redis Data Structure
|
||||||
|
|
||||||
### Index: `hasher`
|
### Key Patterns
|
||||||
- **Shards**: 10 (horizontal scaling)
|
- **Documents**: `hash:plaintext:{plaintext}` - Main document storage
|
||||||
- **Replicas**: 1 (redundancy)
|
- **MD5 Index**: `hash:index:md5:{hash}` - MD5 hash lookup
|
||||||
- **Analyzer**: Custom lowercase analyzer
|
- **SHA1 Index**: `hash:index:sha1:{hash}` - SHA1 hash lookup
|
||||||
|
- **SHA256 Index**: `hash:index:sha256:{hash}` - SHA256 hash lookup
|
||||||
|
- **SHA512 Index**: `hash:index:sha512:{hash}` - SHA512 hash lookup
|
||||||
|
- **Statistics**: `hash:stats` - Redis Hash with count and size
|
||||||
|
|
||||||
### Schema
|
### Document Schema
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"plaintext": "text + keyword",
|
"plaintext": "string",
|
||||||
"md5": "keyword",
|
"md5": "string",
|
||||||
"sha1": "keyword",
|
"sha1": "string",
|
||||||
"sha256": "keyword",
|
"sha256": "string",
|
||||||
"sha512": "keyword",
|
"sha512": "string",
|
||||||
"created_at": "date"
|
"created_at": "ISO 8601 date string"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -174,16 +180,15 @@ export ELASTICSEARCH_NODE=http://localhost:9200
|
|||||||
| SHA1 | 40 | `^[a-f0-9]{40}$` |
|
| SHA1 | 40 | `^[a-f0-9]{40}$` |
|
||||||
| SHA256 | 64 | `^[a-f0-9]{64}$` |
|
| SHA256 | 64 | `^[a-f0-9]{64}$` |
|
||||||
| SHA512 | 128 | `^[a-f0-9]{128}$` |
|
| SHA512 | 128 | `^[a-f0-9]{128}$` |
|
||||||
| Bcrypt | 60 | `^\$2[abxy]\$` |
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 🚀 Performance Metrics
|
## 🚀 Performance Metrics
|
||||||
|
|
||||||
- **Bulk Indexing**: 1000-5000 docs/sec
|
- **Bulk Indexing**: 1000-5000 docs/sec
|
||||||
- **Search Latency**: <50ms (typical)
|
- **Search Latency**: <10ms (typical O(1) lookups)
|
||||||
- **Concurrent Users**: 50+ supported
|
- **Concurrent Users**: 100+ supported
|
||||||
- **Horizontal Scaling**: Ready with 10 shards
|
- **Horizontal Scaling**: Ready with Redis Cluster
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -221,9 +226,9 @@ export ELASTICSEARCH_NODE=http://localhost:9200
|
|||||||
|
|
||||||
### Requirements
|
### Requirements
|
||||||
- Node.js 18.x or higher
|
- Node.js 18.x or higher
|
||||||
- Elasticsearch 8.x
|
- Redis 6.x or higher
|
||||||
- 512MB RAM minimum
|
- 512MB RAM minimum
|
||||||
- Internet connection for Elasticsearch
|
- Redis server running locally or remotely
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -245,7 +250,6 @@ export ELASTICSEARCH_NODE=http://localhost:9200
|
|||||||
## 📈 Future Enhancements
|
## 📈 Future Enhancements
|
||||||
|
|
||||||
### Planned Features
|
### Planned Features
|
||||||
- Bcrypt hash validation
|
|
||||||
- Argon2 hash support
|
- Argon2 hash support
|
||||||
- Search history
|
- Search history
|
||||||
- Batch lookup
|
- Batch lookup
|
||||||
@@ -287,7 +291,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|||||||
## 🙏 Acknowledgments
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
- Built with [Next.js](https://nextjs.org/)
|
- Built with [Next.js](https://nextjs.org/)
|
||||||
- Powered by [Elasticsearch](https://www.elastic.co/)
|
- Powered by [Redis](https://redis.io/)
|
||||||
- Icons by [Lucide](https://lucide.dev/)
|
- Icons by [Lucide](https://lucide.dev/)
|
||||||
- Styled with [Tailwind CSS](https://tailwindcss.com/)
|
- Styled with [Tailwind CSS](https://tailwindcss.com/)
|
||||||
|
|
||||||
@@ -315,7 +319,7 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|||||||
### Completed ✅
|
### Completed ✅
|
||||||
- [x] Core hash search functionality
|
- [x] Core hash search functionality
|
||||||
- [x] Hash generation from plaintext
|
- [x] Hash generation from plaintext
|
||||||
- [x] Elasticsearch integration
|
- [x] Redis integration
|
||||||
- [x] Modern responsive UI
|
- [x] Modern responsive UI
|
||||||
- [x] Bulk indexing script
|
- [x] Bulk indexing script
|
||||||
- [x] API endpoints
|
- [x] API endpoints
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ npm run index-file -- --help # Show help
|
|||||||
| SHA1 | 40 | `5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8` |
|
| SHA1 | 40 | `5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8` |
|
||||||
| SHA256 | 64 | `5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8` |
|
| SHA256 | 64 | `5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8` |
|
||||||
| SHA512 | 128 | `b109f3bbbc244eb82441917ed06d618b9008dd09b3befd1b5e07394c706a8bb9...` |
|
| SHA512 | 128 | `b109f3bbbc244eb82441917ed06d618b9008dd09b3befd1b5e07394c706a8bb9...` |
|
||||||
| Bcrypt | 60 | `$2b$10$N9qo8uLOickgx2ZMRZoMye...` |
|
|
||||||
|
|
||||||
## 🔌 API Quick Reference
|
## 🔌 API Quick Reference
|
||||||
|
|
||||||
@@ -46,32 +45,35 @@ GET /api/health
|
|||||||
- **Web Interface**: http://localhost:3000
|
- **Web Interface**: http://localhost:3000
|
||||||
- **Search API**: http://localhost:3000/api/search
|
- **Search API**: http://localhost:3000/api/search
|
||||||
- **Health API**: http://localhost:3000/api/health
|
- **Health API**: http://localhost:3000/api/health
|
||||||
- **Elasticsearch**: http://localhost:9200
|
- **Redis**: localhost:6379
|
||||||
|
|
||||||
## 📊 Elasticsearch Commands
|
## 📊 Redis Commands
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Health
|
# Test connection
|
||||||
curl http://localhost:9200/_cluster/health?pretty
|
redis-cli ping
|
||||||
|
|
||||||
# Index stats
|
# Get database stats
|
||||||
curl http://localhost:9200/hasher/_stats?pretty
|
redis-cli INFO stats
|
||||||
|
|
||||||
# Document count
|
# Count all keys
|
||||||
curl http://localhost:9200/hasher/_count?pretty
|
redis-cli DBSIZE
|
||||||
|
|
||||||
# Search
|
# List all hash documents
|
||||||
curl http://localhost:9200/hasher/_search?pretty
|
redis-cli KEYS "hash:plaintext:*"
|
||||||
|
|
||||||
# Delete index (CAUTION!)
|
# Get document
|
||||||
curl -X DELETE http://localhost:9200/hasher
|
redis-cli GET "hash:plaintext:password"
|
||||||
|
|
||||||
|
# Clear all data (CAUTION!)
|
||||||
|
redis-cli FLUSHDB
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🐛 Troubleshooting
|
## 🐛 Troubleshooting
|
||||||
|
|
||||||
| Problem | Solution |
|
| Problem | Solution |
|
||||||
|---------|----------|
|
|---------|----------|
|
||||||
| Can't connect to ES | Check `ELASTICSEARCH_NODE` env var |
|
| Can't connect to Redis | Check `REDIS_HOST` and `REDIS_PORT` env vars |
|
||||||
| Port 3000 in use | Use `PORT=3001 npm run dev` |
|
| Port 3000 in use | Use `PORT=3001 npm run dev` |
|
||||||
| Module not found | Run `npm install` |
|
| Module not found | Run `npm install` |
|
||||||
| Build errors | Run `npm run build` to see details |
|
| Build errors | Run `npm run build` to see details |
|
||||||
@@ -82,17 +84,14 @@ curl -X DELETE http://localhost:9200/hasher
|
|||||||
|------|---------|
|
|------|---------|
|
||||||
| `app/page.tsx` | Main UI component |
|
| `app/page.tsx` | Main UI component |
|
||||||
| `app/api/search/route.ts` | Search endpoint |
|
| `app/api/search/route.ts` | Search endpoint |
|
||||||
| `lib/elasticsearch.ts` | ES configuration |
|
| `lib/redis.ts` | Redis configuration |
|
||||||
| `lib/hash.ts` | Hash utilities |
|
|
||||||
| `scripts/index-file.ts` | Bulk indexer |
|
|
||||||
|
|
||||||
## ⚙️ Environment Variables
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Required
|
|
||||||
ELASTICSEARCH_NODE=http://localhost:9200
|
|
||||||
|
|
||||||
# Optional
|
# Optional
|
||||||
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_PASSWORD=your-password
|
||||||
|
REDIS_DB=0
|
||||||
NODE_ENV=production
|
NODE_ENV=production
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
118
README.md
118
README.md
@@ -1,20 +1,21 @@
|
|||||||
# Hasher 🔐
|
# Hasher 🔐
|
||||||
|
|
||||||
A modern, high-performance hash search and generation tool powered by Elasticsearch and Next.js. Search for hash values to find their plaintext origins or generate hashes from any text input.
|
A modern, high-performance hash search and generation tool powered by Redis and Next.js. Search for hash values to find their plaintext origins or generate hashes from any text input.
|
||||||
|
|
||||||

|

|
||||||

|

|
||||||

|

|
||||||
|
|
||||||
## ✨ Features
|
## ✨ Features
|
||||||
|
|
||||||
- 🔍 **Hash Lookup**: Search for MD5, SHA1, SHA256, SHA512, and Bcrypt hashes
|
- 🔍 **Hash Lookup**: Search for MD5, SHA1, SHA256, and SHA512 hashes
|
||||||
- 🔑 **Hash Generation**: Generate multiple hash types from plaintext
|
- 🔑 **Hash Generation**: Generate multiple hash types from plaintext
|
||||||
- 💾 **Auto-Indexing**: Automatically stores searched plaintext and hashes
|
- 💾 **Auto-Indexing**: Automatically stores searched plaintext and hashes
|
||||||
- 📊 **Elasticsearch Backend**: Scalable storage with 10 shards for performance
|
- 📊 **Redis Backend**: Ultra-fast in-memory storage with persistence
|
||||||
- 🚀 **Bulk Indexing**: Import wordlists via command-line script
|
- 🚀 **Bulk Indexing**: Import wordlists via command-line script with resume capability
|
||||||
- 🎨 **Modern UI**: Beautiful, responsive interface with real-time feedback
|
- 🎨 **Modern UI**: Beautiful, responsive interface with real-time feedback
|
||||||
- 📋 **Copy to Clipboard**: One-click copying of any hash value
|
- 📋 **Copy to Clipboard**: One-click copying of any hash value
|
||||||
|
- ⚡ **High Performance**: Lightning-fast searches with Redis indexing
|
||||||
|
|
||||||
## 🏗️ Architecture
|
## 🏗️ Architecture
|
||||||
|
|
||||||
@@ -32,8 +33,9 @@ A modern, high-performance hash search and generation tool powered by Elasticsea
|
|||||||
│
|
│
|
||||||
↓
|
↓
|
||||||
┌─────────────┐
|
┌─────────────┐
|
||||||
│Elasticsearch│ ← Distributed storage
|
│ Redis │ ← In-memory storage
|
||||||
│ 10 Shards │ (localhost:9200)
|
│ (Key-Value │ (localhost:6379)
|
||||||
|
│ + Hashes) │
|
||||||
└─────────────┘
|
└─────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -42,7 +44,7 @@ A modern, high-performance hash search and generation tool powered by Elasticsea
|
|||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- Node.js 18.x or higher
|
- Node.js 18.x or higher
|
||||||
- Elasticsearch 8.x running on `localhost:9200`
|
- Redis 6.x or higher running on `localhost:6379`
|
||||||
- npm or yarn
|
- npm or yarn
|
||||||
|
|
||||||
### Installation
|
### Installation
|
||||||
@@ -58,20 +60,33 @@ A modern, high-performance hash search and generation tool powered by Elasticsea
|
|||||||
npm install
|
npm install
|
||||||
```
|
```
|
||||||
|
|
||||||
3. **Configure Elasticsearch** (optional)
|
3. **Start Redis** (if not already running)
|
||||||
|
|
||||||
By default, the app connects to `http://localhost:9200`. To change this:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export ELASTICSEARCH_NODE=http://your-elasticsearch-host:9200
|
# Using Docker
|
||||||
|
docker run -d --name redis -p 6379:6379 redis:latest
|
||||||
|
|
||||||
|
# Or using system package manager
|
||||||
|
sudo systemctl start redis
|
||||||
```
|
```
|
||||||
|
|
||||||
4. **Run the development server**
|
4. **Configure Redis** (optional)
|
||||||
|
|
||||||
|
By default, the app connects to `localhost:6379`. To change this:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export REDIS_HOST=your-redis-host
|
||||||
|
export REDIS_PORT=6379
|
||||||
|
export REDIS_PASSWORD=your-password # if authentication is enabled
|
||||||
|
export REDIS_DB=0 # database number
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Run the development server**
|
||||||
```bash
|
```bash
|
||||||
npm run dev
|
npm run dev
|
||||||
```
|
```
|
||||||
|
|
||||||
5. **Open your browser**
|
6. **Open your browser**
|
||||||
|
|
||||||
Navigate to [http://localhost:3000](http://localhost:3000)
|
Navigate to [http://localhost:3000](http://localhost:3000)
|
||||||
|
|
||||||
@@ -100,6 +115,12 @@ npm run index-file wordlist.txt
|
|||||||
# With custom batch size
|
# With custom batch size
|
||||||
npm run index-file wordlist.txt -- --batch-size 500
|
npm run index-file wordlist.txt -- --batch-size 500
|
||||||
|
|
||||||
|
# Skip duplicate checking (faster)
|
||||||
|
npm run index-file wordlist.txt -- --no-check
|
||||||
|
|
||||||
|
# Resume interrupted indexing
|
||||||
|
npm run index-file wordlist.txt -- --resume
|
||||||
|
|
||||||
# Show help
|
# Show help
|
||||||
npm run index-file -- --help
|
npm run index-file -- --help
|
||||||
```
|
```
|
||||||
@@ -114,10 +135,11 @@ qwerty
|
|||||||
|
|
||||||
**Script features**:
|
**Script features**:
|
||||||
- ✅ Bulk indexing with configurable batch size
|
- ✅ Bulk indexing with configurable batch size
|
||||||
- ✅ Progress indicator with percentage
|
- ✅ Progress indicator and real-time stats
|
||||||
|
- ✅ State persistence with resume capability
|
||||||
|
- ✅ Optional duplicate checking
|
||||||
- ✅ Error handling and reporting
|
- ✅ Error handling and reporting
|
||||||
- ✅ Performance metrics (docs/sec)
|
- ✅ Performance metrics (docs/sec)
|
||||||
- ✅ Automatic index refresh
|
|
||||||
|
|
||||||
## 🔌 API Reference
|
## 🔌 API Reference
|
||||||
|
|
||||||
@@ -171,15 +193,17 @@ Search for a hash or generate hashes from plaintext.
|
|||||||
|
|
||||||
**GET** `/api/health`
|
**GET** `/api/health`
|
||||||
|
|
||||||
Check Elasticsearch connection and index status.
|
Check Redis connection and index status.
|
||||||
|
|
||||||
**Response**:
|
**Response**:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"elasticsearch": {
|
"redis": {
|
||||||
"cluster": "elasticsearch",
|
"connected": true,
|
||||||
"status": "green"
|
"version": "7.0.15",
|
||||||
|
"usedMemory": 2097152,
|
||||||
|
"dbSize": 1542
|
||||||
},
|
},
|
||||||
"index": {
|
"index": {
|
||||||
"exists": true,
|
"exists": true,
|
||||||
@@ -192,30 +216,33 @@ Check Elasticsearch connection and index status.
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🗄️ Elasticsearch Index
|
## 🗄️ Redis Data Structure
|
||||||
|
|
||||||
### Index Configuration
|
### Key Structure
|
||||||
|
|
||||||
- **Name**: `hasher`
|
**Main Documents**: `hash:plaintext:{plaintext}`
|
||||||
- **Shards**: 10 (for horizontal scaling)
|
- Stores complete hash document as JSON string
|
||||||
- **Replicas**: 1 (for redundancy)
|
- Contains all hash algorithms and metadata
|
||||||
|
|
||||||
### Mapping Schema
|
**Hash Indexes**: `hash:index:{algorithm}:{hash}`
|
||||||
|
- Reverse lookup from hash to plaintext
|
||||||
|
- One key per algorithm (md5, sha1, sha256, sha512)
|
||||||
|
- Value is the plaintext string
|
||||||
|
|
||||||
```json
|
**Statistics**: `hash:stats` (Redis Hash)
|
||||||
|
- `count`: Total number of unique plaintexts
|
||||||
|
- `size`: Approximate total size in bytes
|
||||||
|
|
||||||
|
### Document Schema
|
||||||
|
|
||||||
|
```typescript
|
||||||
{
|
{
|
||||||
"plaintext": {
|
"plaintext": string,
|
||||||
"type": "text",
|
"md5": string,
|
||||||
"analyzer": "lowercase_analyzer",
|
"sha1": string,
|
||||||
"fields": {
|
"sha256": string,
|
||||||
"keyword": { "type": "keyword" }
|
"sha512": string,
|
||||||
}
|
"created_at": string (ISO 8601)
|
||||||
},
|
|
||||||
"md5": { "type": "keyword" },
|
|
||||||
"sha1": { "type": "keyword" },
|
|
||||||
"sha256": { "type": "keyword" },
|
|
||||||
"sha512": { "type": "keyword" },
|
|
||||||
"created_at": { "type": "date" }
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -233,10 +260,11 @@ hasher/
|
|||||||
│ ├── page.tsx # Main UI component
|
│ ├── page.tsx # Main UI component
|
||||||
│ └── globals.css # Global styles
|
│ └── globals.css # Global styles
|
||||||
├── lib/
|
├── lib/
|
||||||
│ ├── elasticsearch.ts # ES client & index config
|
│ ├── redis.ts # Redis client & data layer
|
||||||
│ └── hash.ts # Hash utilities
|
│ └── hash.ts # Hash utilities
|
||||||
├── scripts/
|
├── scripts/
|
||||||
│ └── index-file.ts # Bulk indexing script
|
│ ├── index-file.ts # Bulk indexing script
|
||||||
|
│ └── remove-duplicates.ts # Duplicate removal utility
|
||||||
├── package.json
|
├── package.json
|
||||||
├── tsconfig.json
|
├── tsconfig.json
|
||||||
├── next.config.ts
|
├── next.config.ts
|
||||||
@@ -257,7 +285,10 @@ npm run start
|
|||||||
Create a `.env.local` file:
|
Create a `.env.local` file:
|
||||||
|
|
||||||
```env
|
```env
|
||||||
ELASTICSEARCH_NODE=http://localhost:9200
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_PASSWORD=your-password
|
||||||
|
REDIS_DB=0
|
||||||
```
|
```
|
||||||
|
|
||||||
### Linting
|
### Linting
|
||||||
@@ -274,7 +305,6 @@ npm run lint
|
|||||||
| SHA1 | 40 | `^[a-f0-9]{40}$` |
|
| SHA1 | 40 | `^[a-f0-9]{40}$` |
|
||||||
| SHA256 | 64 | `^[a-f0-9]{64}$` |
|
| SHA256 | 64 | `^[a-f0-9]{64}$` |
|
||||||
| SHA512 | 128 | `^[a-f0-9]{128}$` |
|
| SHA512 | 128 | `^[a-f0-9]{128}$` |
|
||||||
| Bcrypt | 60 | `^\$2[abxy]\$` |
|
|
||||||
|
|
||||||
## 🚀 Performance
|
## 🚀 Performance
|
||||||
|
|
||||||
@@ -300,7 +330,7 @@ This project is open source and available under the [MIT License](LICENSE).
|
|||||||
## 🙏 Acknowledgments
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
- Built with [Next.js](https://nextjs.org/)
|
- Built with [Next.js](https://nextjs.org/)
|
||||||
- Powered by [Elasticsearch](https://www.elastic.co/)
|
- Powered by [Redis](https://redis.io/)
|
||||||
- Icons by [Lucide](https://lucide.dev/)
|
- Icons by [Lucide](https://lucide.dev/)
|
||||||
- Styled with [Tailwind CSS](https://tailwindcss.com/)
|
- Styled with [Tailwind CSS](https://tailwindcss.com/)
|
||||||
|
|
||||||
|
|||||||
222
REDIS_QUICKSTART.md
Archivo normal
222
REDIS_QUICKSTART.md
Archivo normal
@@ -0,0 +1,222 @@
|
|||||||
|
# Redis Migration - Quick Reference
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
### 1. Install Redis
|
||||||
|
```bash
|
||||||
|
# Ubuntu/Debian
|
||||||
|
sudo apt-get install redis-server
|
||||||
|
|
||||||
|
# macOS
|
||||||
|
brew install redis
|
||||||
|
|
||||||
|
# Start Redis
|
||||||
|
redis-server
|
||||||
|
# or
|
||||||
|
sudo systemctl start redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure Environment (Optional)
|
||||||
|
```bash
|
||||||
|
# Create .env.local
|
||||||
|
cat > .env.local << EOF
|
||||||
|
REDIS_HOST=localhost
|
||||||
|
REDIS_PORT=6379
|
||||||
|
REDIS_PASSWORD= # Leave empty if no password
|
||||||
|
REDIS_DB=0
|
||||||
|
EOF
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Start Application
|
||||||
|
```bash
|
||||||
|
yarn dev
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔍 Testing the Migration
|
||||||
|
|
||||||
|
### Test Health Endpoint
|
||||||
|
```bash
|
||||||
|
curl http://localhost:3000/api/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"redis": {
|
||||||
|
"version": "7.x",
|
||||||
|
"memory": "1.5M",
|
||||||
|
"dbSize": 0
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"count": 0,
|
||||||
|
"size": 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test Search API
|
||||||
|
```bash
|
||||||
|
# Generate hashes
|
||||||
|
curl -X POST http://localhost:3000/api/search \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"query":"password"}'
|
||||||
|
|
||||||
|
# Search for hash
|
||||||
|
curl -X POST http://localhost:3000/api/search \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"query":"5f4dcc3b5aa765d61d8327deb882cf99"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📊 Redis Commands
|
||||||
|
|
||||||
|
### Check Connection
|
||||||
|
```bash
|
||||||
|
redis-cli ping
|
||||||
|
# Should return: PONG
|
||||||
|
```
|
||||||
|
|
||||||
|
### View Data
|
||||||
|
```bash
|
||||||
|
# Count all keys
|
||||||
|
redis-cli DBSIZE
|
||||||
|
|
||||||
|
# List all documents
|
||||||
|
redis-cli KEYS "hash:plaintext:*"
|
||||||
|
|
||||||
|
# Get a specific document
|
||||||
|
redis-cli GET "hash:plaintext:password"
|
||||||
|
|
||||||
|
# Get statistics
|
||||||
|
redis-cli HGETALL hash:stats
|
||||||
|
|
||||||
|
# Search by hash
|
||||||
|
redis-cli GET "hash:index:md5:5f4dcc3b5aa765d61d8327deb882cf99"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Clear Data (if needed)
|
||||||
|
```bash
|
||||||
|
# WARNING: Deletes ALL data in current database
|
||||||
|
redis-cli FLUSHDB
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔄 Bulk Indexing
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
```bash
|
||||||
|
yarn index-file sample-wordlist.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Advanced Options
|
||||||
|
```bash
|
||||||
|
# Custom batch size
|
||||||
|
yarn index-file wordlist.txt -- --batch-size 500
|
||||||
|
|
||||||
|
# Skip duplicate checking (faster)
|
||||||
|
yarn index-file wordlist.txt -- --no-check
|
||||||
|
|
||||||
|
# Resume from previous state
|
||||||
|
yarn index-file wordlist.txt -- --resume
|
||||||
|
|
||||||
|
# Custom state file
|
||||||
|
yarn index-file wordlist.txt -- --state-file .my-state.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🐛 Troubleshooting
|
||||||
|
|
||||||
|
### Cannot connect to Redis
|
||||||
|
```bash
|
||||||
|
# Check if Redis is running
|
||||||
|
redis-cli ping
|
||||||
|
|
||||||
|
# Check Redis status
|
||||||
|
sudo systemctl status redis-server
|
||||||
|
|
||||||
|
# View Redis logs
|
||||||
|
sudo journalctl -u redis-server -f
|
||||||
|
```
|
||||||
|
|
||||||
|
### Application shows Redis errors
|
||||||
|
1. Verify Redis is running: `redis-cli ping`
|
||||||
|
2. Check environment variables in `.env.local`
|
||||||
|
3. Check firewall rules if Redis is on another machine
|
||||||
|
4. Verify Redis password if authentication is enabled
|
||||||
|
|
||||||
|
### Clear stale state files
|
||||||
|
```bash
|
||||||
|
rm -f .indexer-state-*.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📈 Monitoring
|
||||||
|
|
||||||
|
### Redis Memory Usage
|
||||||
|
```bash
|
||||||
|
redis-cli INFO memory
|
||||||
|
```
|
||||||
|
|
||||||
|
### Redis Stats
|
||||||
|
```bash
|
||||||
|
redis-cli INFO stats
|
||||||
|
```
|
||||||
|
|
||||||
|
### Application Stats
|
||||||
|
```bash
|
||||||
|
curl http://localhost:3000/api/health | jq .
|
||||||
|
```
|
||||||
|
|
||||||
|
## 🔒 Security (Production)
|
||||||
|
|
||||||
|
### Enable Redis Authentication
|
||||||
|
```bash
|
||||||
|
# Edit redis.conf
|
||||||
|
sudo nano /etc/redis/redis.conf
|
||||||
|
|
||||||
|
# Add/uncomment:
|
||||||
|
requirepass your-strong-password
|
||||||
|
|
||||||
|
# Restart Redis
|
||||||
|
sudo systemctl restart redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
### Update .env.local
|
||||||
|
```env
|
||||||
|
REDIS_PASSWORD=your-strong-password
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📚 Key Differences from Elasticsearch
|
||||||
|
|
||||||
|
| Feature | Elasticsearch | Redis |
|
||||||
|
|---------|--------------|-------|
|
||||||
|
| Data Model | Document-based | Key-value |
|
||||||
|
| Search Complexity | O(log n) | O(1) |
|
||||||
|
| Setup | Complex cluster | Single instance |
|
||||||
|
| Memory | Higher | Lower |
|
||||||
|
| Latency | ~50ms | <10ms |
|
||||||
|
| Scaling | Shards/Replicas | Cluster/Sentinel |
|
||||||
|
|
||||||
|
## ✅ Verification Checklist
|
||||||
|
|
||||||
|
- [ ] Redis is installed and running
|
||||||
|
- [ ] Application builds without errors (`yarn build`)
|
||||||
|
- [ ] Health endpoint returns OK status
|
||||||
|
- [ ] Can generate hashes from plaintext
|
||||||
|
- [ ] Can search for generated hashes
|
||||||
|
- [ ] Statistics display on homepage
|
||||||
|
- [ ] Bulk indexing script works
|
||||||
|
- [ ] Data persists after application restart
|
||||||
|
|
||||||
|
## 📞 Support
|
||||||
|
|
||||||
|
- Redis Documentation: https://redis.io/docs/
|
||||||
|
- ioredis Documentation: https://github.com/redis/ioredis
|
||||||
|
- Project README: [README.md](README.md)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Quick Test Command:**
|
||||||
|
```bash
|
||||||
|
# One-liner to test everything
|
||||||
|
redis-cli ping && yarn build && curl -s http://localhost:3000/api/health | jq .status
|
||||||
|
```
|
||||||
|
|
||||||
|
If all commands succeed, the migration is working correctly! ✅
|
||||||
76
TESTING.md
76
TESTING.md
@@ -9,7 +9,7 @@ This guide will help you quickly set up and test the Hasher application.
|
|||||||
Ensure you have:
|
Ensure you have:
|
||||||
- ✅ Node.js 18.x or higher (`node --version`)
|
- ✅ Node.js 18.x or higher (`node --version`)
|
||||||
- ✅ npm (`npm --version`)
|
- ✅ npm (`npm --version`)
|
||||||
- ✅ Elasticsearch running on `localhost:9200`
|
- ✅ Redis running on `localhost:6379`
|
||||||
|
|
||||||
### 2. Installation
|
### 2. Installation
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@ npm run dev
|
|||||||
|
|
||||||
The application will be available at: **http://localhost:3000**
|
The application will be available at: **http://localhost:3000**
|
||||||
|
|
||||||
### 3. Verify Elasticsearch Connection
|
### 3. Verify Redis Connection
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Check health endpoint
|
# Check health endpoint
|
||||||
@@ -37,7 +37,15 @@ Expected response:
|
|||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"status": "ok",
|
"status": "ok",
|
||||||
"elasticsearch": { ... }
|
"redis": {
|
||||||
|
"version": "7.x",
|
||||||
|
"memory": "1.5M",
|
||||||
|
"dbSize": 0
|
||||||
|
},
|
||||||
|
"stats": {
|
||||||
|
"count": 0,
|
||||||
|
"size": 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -86,20 +94,18 @@ npm run index-file sample-wordlist.txt
|
|||||||
```
|
```
|
||||||
📚 Hasher Indexer
|
📚 Hasher Indexer
|
||||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
Elasticsearch: http://localhost:9200
|
Redis: localhost:6379
|
||||||
Index: hasher
|
|
||||||
File: sample-wordlist.txt
|
File: sample-wordlist.txt
|
||||||
Batch size: 100
|
Batch size: 100
|
||||||
|
Duplicate check: enabled
|
||||||
|
|
||||||
🔗 Connecting to Elasticsearch...
|
🔗 Connecting to Redis...
|
||||||
✅ Connected successfully
|
✅ Connected successfully
|
||||||
|
|
||||||
📖 Reading file...
|
📖 Reading file...
|
||||||
✅ Found 20 words/phrases to process
|
✅ Found 20 words/phrases to process
|
||||||
|
|
||||||
⏳ Progress: 20/20 (100.0%) - Indexed: 20, Errors: 0
|
⏳ Progress: 20/20 (100.0%) - Indexed: 20, Skipped: 0, Errors: 0
|
||||||
|
|
||||||
🔄 Refreshing index...
|
|
||||||
|
|
||||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||||
✅ Indexing complete!
|
✅ Indexing complete!
|
||||||
@@ -185,13 +191,13 @@ fetch('/api/search', {
|
|||||||
- [ ] Results display correctly
|
- [ ] Results display correctly
|
||||||
|
|
||||||
### Data Persistence
|
### Data Persistence
|
||||||
- [ ] New plaintext is saved to Elasticsearch
|
- [ ] New plaintext is saved to Redis
|
||||||
- [ ] Saved hashes can be found in subsequent searches
|
- [ ] Saved hashes can be found in subsequent searches
|
||||||
- [ ] Bulk indexing saves all entries
|
- [ ] Bulk indexing saves all entries
|
||||||
- [ ] Index is created automatically if missing
|
- [ ] Redis keys are created with proper patterns
|
||||||
|
|
||||||
### Error Handling
|
### Error Handling
|
||||||
- [ ] Elasticsearch connection errors are handled
|
- [ ] Redis connection errors are handled
|
||||||
- [ ] Empty search queries are prevented
|
- [ ] Empty search queries are prevented
|
||||||
- [ ] Invalid input is handled gracefully
|
- [ ] Invalid input is handled gracefully
|
||||||
- [ ] Network errors show user-friendly messages
|
- [ ] Network errors show user-friendly messages
|
||||||
@@ -200,15 +206,16 @@ fetch('/api/search', {
|
|||||||
|
|
||||||
## 🐛 Common Issues & Solutions
|
## 🐛 Common Issues & Solutions
|
||||||
|
|
||||||
### Issue: Cannot connect to Elasticsearch
|
### Issue: Cannot connect to Redis
|
||||||
|
|
||||||
**Solution**:
|
**Solution**:
|
||||||
```bash
|
```bash
|
||||||
# Check if Elasticsearch is running
|
# Check if Redis is running
|
||||||
curl http://localhost:9200
|
redis-cli ping
|
||||||
|
|
||||||
# If not accessible, update the environment variable
|
# If not accessible, update the environment variables
|
||||||
export ELASTICSEARCH_NODE=http://your-elasticsearch-host:9200
|
export REDIS_HOST=localhost
|
||||||
|
export REDIS_PORT=6379
|
||||||
npm run dev
|
npm run dev
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -242,33 +249,34 @@ npm run index-file -- "$(pwd)/sample-wordlist.txt"
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 📊 Verify Data in Elasticsearch
|
## 📊 Verify Data in Redis
|
||||||
|
|
||||||
### Check Index Stats
|
### Check Database Size
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:9200/hasher/_stats?pretty
|
redis-cli DBSIZE
|
||||||
```
|
```
|
||||||
|
|
||||||
### Count Documents
|
### Get Statistics
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:9200/hasher/_count?pretty
|
redis-cli HGETALL hash:stats
|
||||||
```
|
```
|
||||||
|
|
||||||
### View Sample Documents
|
### View Sample Documents
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:9200/hasher/_search?pretty&size=5
|
# List first 10 document keys
|
||||||
|
redis-cli --scan --pattern "hash:plaintext:*" | head -10
|
||||||
|
|
||||||
|
# Get a specific document
|
||||||
|
redis-cli GET "hash:plaintext:password"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Search Specific Hash
|
### Search Specific Hash
|
||||||
```bash
|
```bash
|
||||||
curl http://localhost:9200/hasher/_search?pretty -H 'Content-Type: application/json' -d'
|
# Find document by MD5 hash
|
||||||
{
|
redis-cli GET "hash:index:md5:5f4dcc3b5aa765d61d8327deb882cf99"
|
||||||
"query": {
|
|
||||||
"term": {
|
# Then get the full document
|
||||||
"md5": "5f4dcc3b5aa765d61d8327deb882cf99"
|
redis-cli GET "hash:plaintext:password"
|
||||||
}
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -329,7 +337,7 @@ Create `search.json`:
|
|||||||
- [ ] CORS configuration
|
- [ ] CORS configuration
|
||||||
- [ ] Rate limiting (if implemented)
|
- [ ] Rate limiting (if implemented)
|
||||||
- [ ] Error message information disclosure
|
- [ ] Error message information disclosure
|
||||||
- [ ] Elasticsearch authentication (if enabled)
|
- [ ] Redis authentication (if enabled)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -339,7 +347,7 @@ Before deploying to production:
|
|||||||
|
|
||||||
- [ ] All tests passing
|
- [ ] All tests passing
|
||||||
- [ ] Environment variables configured
|
- [ ] Environment variables configured
|
||||||
- [ ] Elasticsearch secured and backed up
|
- [ ] Redis secured and backed up (RDB/AOF)
|
||||||
- [ ] SSL/TLS certificates installed
|
- [ ] SSL/TLS certificates installed
|
||||||
- [ ] Error logging configured
|
- [ ] Error logging configured
|
||||||
- [ ] Monitoring set up
|
- [ ] Monitoring set up
|
||||||
@@ -357,7 +365,7 @@ Before deploying to production:
|
|||||||
|
|
||||||
## Environment
|
## Environment
|
||||||
- Node.js version:
|
- Node.js version:
|
||||||
- Elasticsearch version:
|
- Redis version:
|
||||||
- Browser(s) tested:
|
- Browser(s) tested:
|
||||||
|
|
||||||
## Test Results
|
## Test Results
|
||||||
|
|||||||
@@ -1,34 +1,29 @@
|
|||||||
import { NextResponse } from 'next/server';
|
import { NextResponse } from 'next/server';
|
||||||
import { esClient, INDEX_NAME } from '@/lib/elasticsearch';
|
import { getRedisInfo, getStats, INDEX_NAME } from '@/lib/redis';
|
||||||
|
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
try {
|
try {
|
||||||
// Check Elasticsearch connection
|
// Check Redis connection and get info
|
||||||
const health = await esClient.cluster.health({});
|
const redisInfo = await getRedisInfo();
|
||||||
|
|
||||||
// Check if index exists
|
// Get index stats
|
||||||
const indexExists = await esClient.indices.exists({ index: INDEX_NAME });
|
const stats = await getStats();
|
||||||
|
|
||||||
// Get index stats if exists
|
|
||||||
let stats = null;
|
|
||||||
if (indexExists) {
|
|
||||||
const statsResponse = await esClient.indices.stats({ index: INDEX_NAME });
|
|
||||||
stats = {
|
|
||||||
documentCount: statsResponse._all?.primaries?.docs?.count || 0,
|
|
||||||
indexSize: statsResponse._all?.primaries?.store?.size_in_bytes || 0
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
status: 'ok',
|
status: 'ok',
|
||||||
elasticsearch: {
|
redis: {
|
||||||
cluster: health.cluster_name,
|
connected: redisInfo.connected,
|
||||||
status: health.status,
|
version: redisInfo.version,
|
||||||
|
usedMemory: redisInfo.usedMemory,
|
||||||
|
dbSize: redisInfo.dbSize
|
||||||
},
|
},
|
||||||
index: {
|
index: {
|
||||||
exists: indexExists,
|
exists: true,
|
||||||
name: INDEX_NAME,
|
name: INDEX_NAME,
|
||||||
stats
|
stats: {
|
||||||
|
documentCount: stats.count,
|
||||||
|
indexSize: stats.size
|
||||||
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -1,17 +1,7 @@
|
|||||||
import { NextRequest, NextResponse } from 'next/server';
|
import { NextRequest, NextResponse } from 'next/server';
|
||||||
import { esClient, INDEX_NAME, initializeIndex } from '@/lib/elasticsearch';
|
import { storeHashDocument, findByPlaintext, findByHash, initializeRedis } from '@/lib/redis';
|
||||||
import { generateHashes, detectHashType } from '@/lib/hash';
|
import { generateHashes, detectHashType } from '@/lib/hash';
|
||||||
|
|
||||||
interface HashDocument {
|
|
||||||
plaintext: string;
|
|
||||||
md5: string;
|
|
||||||
sha1: string;
|
|
||||||
sha256: string;
|
|
||||||
sha512: string;
|
|
||||||
bcrypt: string;
|
|
||||||
created_at?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function POST(request: NextRequest) {
|
export async function POST(request: NextRequest) {
|
||||||
try {
|
try {
|
||||||
const { query } = await request.json();
|
const { query } = await request.json();
|
||||||
@@ -23,8 +13,8 @@ export async function POST(request: NextRequest) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure index exists
|
// Ensure Redis is connected
|
||||||
await initializeIndex();
|
await initializeRedis();
|
||||||
|
|
||||||
const cleanQuery = query.trim().split(/\s+/)[0];
|
const cleanQuery = query.trim().split(/\s+/)[0];
|
||||||
|
|
||||||
@@ -39,37 +29,24 @@ export async function POST(request: NextRequest) {
|
|||||||
const hashType = detectHashType(cleanQueryLower);
|
const hashType = detectHashType(cleanQueryLower);
|
||||||
|
|
||||||
if (hashType) {
|
if (hashType) {
|
||||||
// Query is a hash - search for it in Elasticsearch
|
// Query is a hash - search for it in Redis
|
||||||
const searchResponse = await esClient.search<HashDocument>({
|
const doc = await findByHash(hashType, cleanQueryLower);
|
||||||
index: INDEX_NAME,
|
|
||||||
query: {
|
|
||||||
term: {
|
|
||||||
[hashType]: hashType === 'bcrypt' ? cleanQuery : cleanQueryLower
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
const hits = searchResponse.hits.hits;
|
if (doc) {
|
||||||
|
|
||||||
if (hits.length > 0) {
|
|
||||||
// Found matching plaintext
|
// Found matching plaintext
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
found: true,
|
found: true,
|
||||||
hashType,
|
hashType,
|
||||||
hash: cleanQuery,
|
hash: cleanQuery,
|
||||||
results: hits.map((hit) => {
|
results: [{
|
||||||
const source = hit._source!;
|
plaintext: doc.plaintext,
|
||||||
return {
|
hashes: {
|
||||||
plaintext: source.plaintext,
|
md5: doc.md5,
|
||||||
hashes: {
|
sha1: doc.sha1,
|
||||||
md5: source.md5,
|
sha256: doc.sha256,
|
||||||
sha1: source.sha1,
|
sha512: doc.sha512,
|
||||||
sha256: source.sha256,
|
}
|
||||||
sha512: source.sha512,
|
}]
|
||||||
bcrypt: source.bcrypt,
|
|
||||||
}
|
|
||||||
};
|
|
||||||
})
|
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Hash not found in database
|
// Hash not found in database
|
||||||
@@ -82,72 +59,41 @@ export async function POST(request: NextRequest) {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Query is plaintext - check if it already exists first
|
// Query is plaintext - check if it already exists first
|
||||||
const existsResponse = await esClient.search<HashDocument>({
|
const existingDoc = await findByPlaintext(cleanQuery);
|
||||||
index: INDEX_NAME,
|
|
||||||
query: {
|
|
||||||
term: {
|
|
||||||
'plaintext.keyword': cleanQuery
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let hashes;
|
let hashes;
|
||||||
|
let wasGenerated = false;
|
||||||
|
|
||||||
if (existsResponse.hits.hits.length > 0) {
|
if (existingDoc) {
|
||||||
// Plaintext found, retrieve existing hashes
|
// Plaintext found, retrieve existing hashes
|
||||||
const existingDoc = existsResponse.hits.hits[0]._source!;
|
|
||||||
hashes = {
|
hashes = {
|
||||||
md5: existingDoc.md5,
|
md5: existingDoc.md5,
|
||||||
sha1: existingDoc.sha1,
|
sha1: existingDoc.sha1,
|
||||||
sha256: existingDoc.sha256,
|
sha256: existingDoc.sha256,
|
||||||
sha512: existingDoc.sha512,
|
sha512: existingDoc.sha512,
|
||||||
bcrypt: existingDoc.bcrypt,
|
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// Plaintext not found, generate hashes and check if any hash already exists
|
// Plaintext not found, generate and store hashes
|
||||||
hashes = await generateHashes(cleanQuery);
|
hashes = await generateHashes(cleanQuery);
|
||||||
|
|
||||||
const hashExistsResponse = await esClient.search<HashDocument>({
|
await storeHashDocument({
|
||||||
index: INDEX_NAME,
|
...hashes,
|
||||||
query: {
|
created_at: new Date().toISOString()
|
||||||
bool: {
|
|
||||||
should: [
|
|
||||||
{ term: { md5: hashes.md5 } },
|
|
||||||
{ term: { sha1: hashes.sha1 } },
|
|
||||||
{ term: { sha256: hashes.sha256 } },
|
|
||||||
{ term: { sha512: hashes.sha512 } },
|
|
||||||
],
|
|
||||||
minimum_should_match: 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
if (hashExistsResponse.hits.hits.length === 0) {
|
wasGenerated = true;
|
||||||
// No duplicates found, insert new document
|
|
||||||
await esClient.index({
|
|
||||||
index: INDEX_NAME,
|
|
||||||
document: {
|
|
||||||
...hashes,
|
|
||||||
created_at: new Date().toISOString()
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Refresh index to make the document searchable immediately
|
|
||||||
await esClient.indices.refresh({ index: INDEX_NAME });
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
found: true,
|
found: true,
|
||||||
isPlaintext: true,
|
isPlaintext: true,
|
||||||
plaintext: cleanQuery,
|
plaintext: cleanQuery,
|
||||||
wasGenerated: existsResponse.hits.hits.length === 0,
|
wasGenerated,
|
||||||
hashes: {
|
hashes: {
|
||||||
md5: hashes.md5,
|
md5: hashes.md5,
|
||||||
sha1: hashes.sha1,
|
sha1: hashes.sha1,
|
||||||
sha256: hashes.sha256,
|
sha256: hashes.sha256,
|
||||||
sha512: hashes.sha512,
|
sha512: hashes.sha512,
|
||||||
bcrypt: hashes.bcrypt,
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,8 +14,8 @@ const geistMono = Geist_Mono({
|
|||||||
|
|
||||||
export const metadata: Metadata = {
|
export const metadata: Metadata = {
|
||||||
title: "Hasher - Hash Search & Generator",
|
title: "Hasher - Hash Search & Generator",
|
||||||
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt. Powered by Elasticsearch.",
|
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512. Powered by Redis.",
|
||||||
keywords: ["hash", "md5", "sha1", "sha256", "sha512", "bcrypt", "hash generator", "hash search", "elasticsearch"],
|
keywords: ["hash", "md5", "sha1", "sha256", "sha512", "hash generator", "hash search", "redis"],
|
||||||
authors: [{ name: "Hasher" }],
|
authors: [{ name: "Hasher" }],
|
||||||
creator: "Hasher",
|
creator: "Hasher",
|
||||||
publisher: "Hasher",
|
publisher: "Hasher",
|
||||||
@@ -28,7 +28,7 @@ export const metadata: Metadata = {
|
|||||||
openGraph: {
|
openGraph: {
|
||||||
type: "website",
|
type: "website",
|
||||||
title: "Hasher - Hash Search & Generator",
|
title: "Hasher - Hash Search & Generator",
|
||||||
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt.",
|
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512.",
|
||||||
siteName: "Hasher",
|
siteName: "Hasher",
|
||||||
images: [
|
images: [
|
||||||
{
|
{
|
||||||
@@ -42,7 +42,7 @@ export const metadata: Metadata = {
|
|||||||
twitter: {
|
twitter: {
|
||||||
card: "summary",
|
card: "summary",
|
||||||
title: "Hasher - Hash Search & Generator",
|
title: "Hasher - Hash Search & Generator",
|
||||||
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt.",
|
description: "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512.",
|
||||||
images: ["/logo.png"],
|
images: ["/logo.png"],
|
||||||
},
|
},
|
||||||
viewport: {
|
viewport: {
|
||||||
|
|||||||
62
app/page.tsx
62
app/page.tsx
@@ -1,7 +1,7 @@
|
|||||||
'use client';
|
'use client';
|
||||||
|
|
||||||
import { useState } from 'react';
|
import { useState, useEffect } from 'react';
|
||||||
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2 } from 'lucide-react';
|
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database } from 'lucide-react';
|
||||||
|
|
||||||
interface SearchResult {
|
interface SearchResult {
|
||||||
found: boolean;
|
found: boolean;
|
||||||
@@ -15,7 +15,6 @@ interface SearchResult {
|
|||||||
sha1: string;
|
sha1: string;
|
||||||
sha256: string;
|
sha256: string;
|
||||||
sha512: string;
|
sha512: string;
|
||||||
bcrypt: string;
|
|
||||||
};
|
};
|
||||||
results?: Array<{
|
results?: Array<{
|
||||||
plaintext: string;
|
plaintext: string;
|
||||||
@@ -24,18 +23,53 @@ interface SearchResult {
|
|||||||
sha1: string;
|
sha1: string;
|
||||||
sha256: string;
|
sha256: string;
|
||||||
sha512: string;
|
sha512: string;
|
||||||
bcrypt: string;
|
|
||||||
};
|
};
|
||||||
}>;
|
}>;
|
||||||
message?: string;
|
message?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface IndexStats {
|
||||||
|
documentCount: number;
|
||||||
|
indexSize: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatBytes(bytes: number): string {
|
||||||
|
if (bytes === 0) return '0 B';
|
||||||
|
const k = 1024;
|
||||||
|
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
||||||
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||||
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
function formatNumber(num: number): string {
|
||||||
|
return num.toLocaleString();
|
||||||
|
}
|
||||||
|
|
||||||
export default function Home() {
|
export default function Home() {
|
||||||
const [query, setQuery] = useState('');
|
const [query, setQuery] = useState('');
|
||||||
const [result, setResult] = useState<SearchResult | null>(null);
|
const [result, setResult] = useState<SearchResult | null>(null);
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [error, setError] = useState('');
|
const [error, setError] = useState('');
|
||||||
const [copiedField, setCopiedField] = useState<string | null>(null);
|
const [copiedField, setCopiedField] = useState<string | null>(null);
|
||||||
|
const [stats, setStats] = useState<IndexStats | null>(null);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const fetchStats = async () => {
|
||||||
|
try {
|
||||||
|
const response = await fetch('/api/health');
|
||||||
|
if (response.ok) {
|
||||||
|
const data = await response.json();
|
||||||
|
if (data.index?.stats) {
|
||||||
|
setStats(data.index.stats);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (_err) {
|
||||||
|
// Silently fail - stats are not critical
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fetchStats();
|
||||||
|
}, [result]); // Refresh stats after each search result
|
||||||
|
|
||||||
const handleSearch = async (e: React.FormEvent) => {
|
const handleSearch = async (e: React.FormEvent) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
@@ -108,8 +142,20 @@ export default function Home() {
|
|||||||
Search for hashes or generate them from plaintext
|
Search for hashes or generate them from plaintext
|
||||||
</p>
|
</p>
|
||||||
<p className="text-sm text-gray-500 mt-2">
|
<p className="text-sm text-gray-500 mt-2">
|
||||||
Supports MD5, SHA1, SHA256, SHA512, and Bcrypt
|
Supports MD5, SHA1, SHA256, and SHA512
|
||||||
</p>
|
</p>
|
||||||
|
{stats && (
|
||||||
|
<div className="flex items-center justify-center gap-4 mt-4 text-sm text-gray-500">
|
||||||
|
<div className="flex items-center gap-1.5">
|
||||||
|
<Database className="w-4 h-4" />
|
||||||
|
<span><strong>{formatNumber(stats.documentCount)}</strong> hashes</span>
|
||||||
|
</div>
|
||||||
|
<span className="text-gray-300">•</span>
|
||||||
|
<div>
|
||||||
|
<span><strong>{formatBytes(stats.indexSize)}</strong> indexed</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Search Form */}
|
{/* Search Form */}
|
||||||
@@ -166,7 +212,6 @@ export default function Home() {
|
|||||||
<HashDisplay label="SHA1" value={result.hashes!.sha1} field="sha1-gen" />
|
<HashDisplay label="SHA1" value={result.hashes!.sha1} field="sha1-gen" />
|
||||||
<HashDisplay label="SHA256" value={result.hashes!.sha256} field="sha256-gen" />
|
<HashDisplay label="SHA256" value={result.hashes!.sha256} field="sha256-gen" />
|
||||||
<HashDisplay label="SHA512" value={result.hashes!.sha512} field="sha512-gen" />
|
<HashDisplay label="SHA512" value={result.hashes!.sha512} field="sha512-gen" />
|
||||||
<HashDisplay label="Bcrypt" value={result.hashes!.bcrypt} field="bcrypt-gen" />
|
|
||||||
</div>
|
</div>
|
||||||
{result.wasGenerated && (
|
{result.wasGenerated && (
|
||||||
<div className="mt-6 bg-blue-50 border border-blue-200 rounded-xl p-4">
|
<div className="mt-6 bg-blue-50 border border-blue-200 rounded-xl p-4">
|
||||||
@@ -212,7 +257,6 @@ export default function Home() {
|
|||||||
<HashDisplay label="SHA1" value={item.hashes.sha1} field={`sha1-${idx}`} />
|
<HashDisplay label="SHA1" value={item.hashes.sha1} field={`sha1-${idx}`} />
|
||||||
<HashDisplay label="SHA256" value={item.hashes.sha256} field={`sha256-${idx}`} />
|
<HashDisplay label="SHA256" value={item.hashes.sha256} field={`sha256-${idx}`} />
|
||||||
<HashDisplay label="SHA512" value={item.hashes.sha512} field={`sha512-${idx}`} />
|
<HashDisplay label="SHA512" value={item.hashes.sha512} field={`sha512-${idx}`} />
|
||||||
<HashDisplay label="Bcrypt" value={item.hashes.bcrypt} field={`bcrypt-${idx}`} />
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
@@ -256,7 +300,7 @@ export default function Home() {
|
|||||||
</div>
|
</div>
|
||||||
<h3 className="text-xl font-bold text-gray-900 mb-2">Generate Hashes</h3>
|
<h3 className="text-xl font-bold text-gray-900 mb-2">Generate Hashes</h3>
|
||||||
<p className="text-gray-600">
|
<p className="text-gray-600">
|
||||||
Enter any plaintext to instantly generate MD5, SHA1, SHA256, SHA512, and Bcrypt hashes. Results are saved automatically.
|
Enter any plaintext to instantly generate MD5, SHA1, SHA256, and SHA512 hashes. Results are saved automatically.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -264,7 +308,7 @@ export default function Home() {
|
|||||||
|
|
||||||
{/* Footer */}
|
{/* Footer */}
|
||||||
<footer className="mt-16 text-center text-gray-500 text-sm">
|
<footer className="mt-16 text-center text-gray-500 text-sm">
|
||||||
<p>Powered by Elasticsearch • Built with Next.js</p>
|
<p>Powered by Redis • Built with Next.js</p>
|
||||||
</footer>
|
</footer>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -1,79 +0,0 @@
|
|||||||
import { Client } from '@elastic/elasticsearch';
|
|
||||||
|
|
||||||
const ELASTICSEARCH_NODE = process.env.ELASTICSEARCH_NODE || 'http://localhost:9200';
|
|
||||||
const INDEX_NAME = 'hasher';
|
|
||||||
|
|
||||||
export const esClient = new Client({
|
|
||||||
node: ELASTICSEARCH_NODE,
|
|
||||||
requestTimeout: 30000,
|
|
||||||
maxRetries: 3,
|
|
||||||
});
|
|
||||||
|
|
||||||
export const INDEX_MAPPING = {
|
|
||||||
settings: {
|
|
||||||
number_of_shards: 10,
|
|
||||||
number_of_replicas: 1,
|
|
||||||
analysis: {
|
|
||||||
analyzer: {
|
|
||||||
lowercase_analyzer: {
|
|
||||||
type: 'custom' as const,
|
|
||||||
tokenizer: 'keyword',
|
|
||||||
filter: ['lowercase']
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
mappings: {
|
|
||||||
properties: {
|
|
||||||
plaintext: {
|
|
||||||
type: 'text' as const,
|
|
||||||
analyzer: 'lowercase_analyzer',
|
|
||||||
fields: {
|
|
||||||
keyword: {
|
|
||||||
type: 'keyword' as const
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
md5: {
|
|
||||||
type: 'keyword' as const
|
|
||||||
},
|
|
||||||
sha1: {
|
|
||||||
type: 'keyword' as const
|
|
||||||
},
|
|
||||||
sha256: {
|
|
||||||
type: 'keyword' as const
|
|
||||||
},
|
|
||||||
sha512: {
|
|
||||||
type: 'keyword' as const
|
|
||||||
},
|
|
||||||
bcrypt: {
|
|
||||||
type: 'keyword' as const
|
|
||||||
},
|
|
||||||
created_at: {
|
|
||||||
type: 'date' as const
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
export async function initializeIndex(): Promise<void> {
|
|
||||||
try {
|
|
||||||
const indexExists = await esClient.indices.exists({ index: INDEX_NAME });
|
|
||||||
|
|
||||||
if (!indexExists) {
|
|
||||||
await esClient.indices.create({
|
|
||||||
index: INDEX_NAME,
|
|
||||||
settings: INDEX_MAPPING.settings,
|
|
||||||
mappings: INDEX_MAPPING.mappings
|
|
||||||
});
|
|
||||||
console.log(`Index '${INDEX_NAME}' created successfully with 10 shards`);
|
|
||||||
} else {
|
|
||||||
console.log(`Index '${INDEX_NAME}' already exists`);
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Error initializing Elasticsearch index:', error);
|
|
||||||
throw error;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
export { INDEX_NAME };
|
|
||||||
21
lib/hash.ts
21
lib/hash.ts
@@ -1,5 +1,4 @@
|
|||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
import bcrypt from 'bcrypt';
|
|
||||||
|
|
||||||
export interface HashResult {
|
export interface HashResult {
|
||||||
plaintext: string;
|
plaintext: string;
|
||||||
@@ -7,22 +6,18 @@ export interface HashResult {
|
|||||||
sha1: string;
|
sha1: string;
|
||||||
sha256: string;
|
sha256: string;
|
||||||
sha512: string;
|
sha512: string;
|
||||||
bcrypt: string;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate all common hashes for a given plaintext
|
* Generate all common hashes for a given plaintext
|
||||||
*/
|
*/
|
||||||
export async function generateHashes(plaintext: string): Promise<HashResult> {
|
export async function generateHashes(plaintext: string): Promise<HashResult> {
|
||||||
const bcryptHash = await bcrypt.hash(plaintext, 10);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
plaintext,
|
plaintext,
|
||||||
md5: crypto.createHash('md5').update(plaintext).digest('hex'),
|
md5: crypto.createHash('md5').update(plaintext).digest('hex'),
|
||||||
sha1: crypto.createHash('sha1').update(plaintext).digest('hex'),
|
sha1: crypto.createHash('sha1').update(plaintext).digest('hex'),
|
||||||
sha256: crypto.createHash('sha256').update(plaintext).digest('hex'),
|
sha256: crypto.createHash('sha256').update(plaintext).digest('hex'),
|
||||||
sha512: crypto.createHash('sha512').update(plaintext).digest('hex'),
|
sha512: crypto.createHash('sha512').update(plaintext).digest('hex'),
|
||||||
bcrypt: bcryptHash,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -52,11 +47,6 @@ export function detectHashType(hash: string): string | null {
|
|||||||
return 'sha512';
|
return 'sha512';
|
||||||
}
|
}
|
||||||
|
|
||||||
// BCrypt: starts with $2a$, $2b$, $2x$, or $2y$
|
|
||||||
if (/^\$2[abxy]\$/.test(cleanHash)) {
|
|
||||||
return 'bcrypt';
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,14 +56,3 @@ export function detectHashType(hash: string): string | null {
|
|||||||
export function isHash(input: string): boolean {
|
export function isHash(input: string): boolean {
|
||||||
return detectHashType(input) !== null;
|
return detectHashType(input) !== null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Verify a plaintext against a bcrypt hash
|
|
||||||
*/
|
|
||||||
export async function verifyBcrypt(plaintext: string, hash: string): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
return await bcrypt.compare(plaintext, hash);
|
|
||||||
} catch (_error) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
178
lib/redis.ts
Archivo normal
178
lib/redis.ts
Archivo normal
@@ -0,0 +1,178 @@
|
|||||||
|
import Redis from 'ioredis';
|
||||||
|
|
||||||
|
const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
|
||||||
|
const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
|
||||||
|
const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
|
||||||
|
const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
|
||||||
|
|
||||||
|
export const INDEX_NAME = 'hasher';
|
||||||
|
|
||||||
|
// Create Redis client with connection pooling
|
||||||
|
export const redisClient = new Redis({
|
||||||
|
host: REDIS_HOST,
|
||||||
|
port: REDIS_PORT,
|
||||||
|
password: REDIS_PASSWORD,
|
||||||
|
db: REDIS_DB,
|
||||||
|
retryStrategy: (times) => {
|
||||||
|
const delay = Math.min(times * 50, 2000);
|
||||||
|
return delay;
|
||||||
|
},
|
||||||
|
maxRetriesPerRequest: 3,
|
||||||
|
enableReadyCheck: true,
|
||||||
|
lazyConnect: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle connection errors
|
||||||
|
redisClient.on('error', (err) => {
|
||||||
|
console.error('Redis Client Error:', err);
|
||||||
|
});
|
||||||
|
|
||||||
|
redisClient.on('connect', () => {
|
||||||
|
console.log('Redis connected successfully');
|
||||||
|
});
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Redis Keys Structure:
|
||||||
|
*
|
||||||
|
* 1. Hash documents: hash:plaintext:{plaintext} = JSON string
|
||||||
|
* - Stores all hash data for a plaintext
|
||||||
|
*
|
||||||
|
* 2. Hash indexes: hash:index:{algorithm}:{hash} = plaintext
|
||||||
|
* - Allows reverse lookup from hash to plaintext
|
||||||
|
* - One key per algorithm (md5, sha1, sha256, sha512)
|
||||||
|
*
|
||||||
|
* 3. Statistics: hash:stats = Hash {count, size}
|
||||||
|
* - count: total number of unique plaintexts
|
||||||
|
* - size: approximate total size in bytes
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface HashDocument {
|
||||||
|
plaintext: string;
|
||||||
|
md5: string;
|
||||||
|
sha1: string;
|
||||||
|
sha256: string;
|
||||||
|
sha512: string;
|
||||||
|
created_at: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store a hash document in Redis
|
||||||
|
*/
|
||||||
|
export async function storeHashDocument(doc: HashDocument): Promise<void> {
|
||||||
|
const pipeline = redisClient.pipeline();
|
||||||
|
|
||||||
|
// Store main document
|
||||||
|
const key = `hash:plaintext:${doc.plaintext}`;
|
||||||
|
pipeline.set(key, JSON.stringify(doc));
|
||||||
|
|
||||||
|
// Create indexes for each hash type
|
||||||
|
pipeline.set(`hash:index:md5:${doc.md5}`, doc.plaintext);
|
||||||
|
pipeline.set(`hash:index:sha1:${doc.sha1}`, doc.plaintext);
|
||||||
|
pipeline.set(`hash:index:sha256:${doc.sha256}`, doc.plaintext);
|
||||||
|
pipeline.set(`hash:index:sha512:${doc.sha512}`, doc.plaintext);
|
||||||
|
|
||||||
|
// Update statistics
|
||||||
|
pipeline.hincrby('hash:stats', 'count', 1);
|
||||||
|
pipeline.hincrby('hash:stats', 'size', JSON.stringify(doc).length);
|
||||||
|
|
||||||
|
await pipeline.exec();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find a hash document by plaintext
|
||||||
|
*/
|
||||||
|
export async function findByPlaintext(plaintext: string): Promise<HashDocument | null> {
|
||||||
|
const key = `hash:plaintext:${plaintext}`;
|
||||||
|
const data = await redisClient.get(key);
|
||||||
|
|
||||||
|
if (!data) return null;
|
||||||
|
|
||||||
|
return JSON.parse(data) as HashDocument;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find a hash document by any hash value
|
||||||
|
*/
|
||||||
|
export async function findByHash(algorithm: string, hash: string): Promise<HashDocument | null> {
|
||||||
|
const indexKey = `hash:index:${algorithm}:${hash}`;
|
||||||
|
const plaintext = await redisClient.get(indexKey);
|
||||||
|
|
||||||
|
if (!plaintext) return null;
|
||||||
|
|
||||||
|
return findByPlaintext(plaintext);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if plaintext or any of its hashes exist
|
||||||
|
*/
|
||||||
|
export async function checkExistence(plaintext: string, hashes: {
|
||||||
|
md5: string;
|
||||||
|
sha1: string;
|
||||||
|
sha256: string;
|
||||||
|
sha512: string;
|
||||||
|
}): Promise<boolean> {
|
||||||
|
const pipeline = redisClient.pipeline();
|
||||||
|
|
||||||
|
pipeline.exists(`hash:plaintext:${plaintext}`);
|
||||||
|
pipeline.exists(`hash:index:md5:${hashes.md5}`);
|
||||||
|
pipeline.exists(`hash:index:sha1:${hashes.sha1}`);
|
||||||
|
pipeline.exists(`hash:index:sha256:${hashes.sha256}`);
|
||||||
|
pipeline.exists(`hash:index:sha512:${hashes.sha512}`);
|
||||||
|
|
||||||
|
const results = await pipeline.exec();
|
||||||
|
|
||||||
|
if (!results) return false;
|
||||||
|
|
||||||
|
// Check if any key exists
|
||||||
|
return results.some(([err, value]) => !err && value === 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get index statistics
|
||||||
|
*/
|
||||||
|
export async function getStats(): Promise<{ count: number; size: number }> {
|
||||||
|
const stats = await redisClient.hgetall('hash:stats');
|
||||||
|
|
||||||
|
return {
|
||||||
|
count: parseInt(stats.count || '0', 10),
|
||||||
|
size: parseInt(stats.size || '0', 10)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize Redis (compatibility function, Redis doesn't need explicit initialization)
|
||||||
|
*/
|
||||||
|
export async function initializeRedis(): Promise<void> {
|
||||||
|
// Check connection
|
||||||
|
await redisClient.ping();
|
||||||
|
console.log('Redis initialized successfully');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Redis info for health check
|
||||||
|
*/
|
||||||
|
export async function getRedisInfo(): Promise<{
|
||||||
|
connected: boolean;
|
||||||
|
version: string;
|
||||||
|
usedMemory: number;
|
||||||
|
dbSize: number;
|
||||||
|
}> {
|
||||||
|
const info = await redisClient.info('server');
|
||||||
|
const memory = await redisClient.info('memory');
|
||||||
|
const dbSize = await redisClient.dbsize();
|
||||||
|
|
||||||
|
// Parse Redis info string
|
||||||
|
const parseInfo = (infoStr: string, key: string): string => {
|
||||||
|
const match = infoStr.match(new RegExp(`${key}:(.+)`));
|
||||||
|
return match ? match[1].trim() : 'unknown';
|
||||||
|
};
|
||||||
|
|
||||||
|
return {
|
||||||
|
connected: redisClient.status === 'ready',
|
||||||
|
version: parseInfo(info, 'redis_version'),
|
||||||
|
usedMemory: parseInt(parseInfo(memory, 'used_memory'), 10) || 0,
|
||||||
|
dbSize
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export { REDIS_HOST, REDIS_PORT };
|
||||||
11
package.json
11
package.json
@@ -1,14 +1,14 @@
|
|||||||
{
|
{
|
||||||
"name": "hasher",
|
"name": "hasher",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"description": "A modern hash search and generation tool powered by Elasticsearch and Next.js",
|
"description": "A modern hash search and generation tool powered by Redis and Next.js",
|
||||||
"keywords": [
|
"keywords": [
|
||||||
"hash",
|
"hash",
|
||||||
"md5",
|
"md5",
|
||||||
"sha1",
|
"sha1",
|
||||||
"sha256",
|
"sha256",
|
||||||
"sha512",
|
"sha512",
|
||||||
"elasticsearch",
|
"redis",
|
||||||
"nextjs",
|
"nextjs",
|
||||||
"cryptography",
|
"cryptography",
|
||||||
"security",
|
"security",
|
||||||
@@ -34,12 +34,11 @@
|
|||||||
"build": "next build",
|
"build": "next build",
|
||||||
"start": "next start",
|
"start": "next start",
|
||||||
"lint": "eslint",
|
"lint": "eslint",
|
||||||
"index-file": "tsx scripts/index-file.ts"
|
"index-file": "tsx scripts/index-file.ts",
|
||||||
|
"remove-duplicates": "tsx scripts/remove-duplicates.ts"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@elastic/elasticsearch": "^9.2.0",
|
"ioredis": "^5.4.2",
|
||||||
"@types/bcrypt": "^6.0.0",
|
|
||||||
"bcrypt": "^6.0.0",
|
|
||||||
"lucide-react": "^0.555.0",
|
"lucide-react": "^0.555.0",
|
||||||
"next": "15.4.8",
|
"next": "15.4.8",
|
||||||
"react": "19.1.2",
|
"react": "19.1.2",
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "Hasher - Hash Search & Generator",
|
"name": "Hasher - Hash Search & Generator",
|
||||||
"short_name": "Hasher",
|
"short_name": "Hasher",
|
||||||
"description": "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, SHA512, and Bcrypt.",
|
"description": "Search for hashes or generate them from plaintext. Supports MD5, SHA1, SHA256, and SHA512.",
|
||||||
"start_url": "/",
|
"start_url": "/",
|
||||||
"display": "standalone",
|
"display": "standalone",
|
||||||
"background_color": "#ffffff",
|
"background_color": "#ffffff",
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
* Hasher Indexer Script
|
* Hasher Indexer Script
|
||||||
*
|
*
|
||||||
* This script reads a text file with one word/phrase per line and indexes
|
* This script reads a text file with one word/phrase per line and indexes
|
||||||
* all the generated hashes into Elasticsearch.
|
* all the generated hashes into Redis.
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
* npx tsx scripts/index-file.ts <path-to-file.txt> [options]
|
* npx tsx scripts/index-file.ts <path-to-file.txt> [options]
|
||||||
@@ -14,17 +14,21 @@
|
|||||||
* --batch-size=<number> Number of items to process in each batch (default: 100)
|
* --batch-size=<number> Number of items to process in each batch (default: 100)
|
||||||
* --resume Resume from last saved state (default: true)
|
* --resume Resume from last saved state (default: true)
|
||||||
* --no-resume Start from beginning, ignore saved state
|
* --no-resume Start from beginning, ignore saved state
|
||||||
|
* --no-check Skip duplicate checking (faster, but may create duplicates)
|
||||||
* --state-file=<path> Custom state file path (default: .indexer-state-<filename>.json)
|
* --state-file=<path> Custom state file path (default: .indexer-state-<filename>.json)
|
||||||
* --help, -h Show this help message
|
* --help, -h Show this help message
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Client } from '@elastic/elasticsearch';
|
import Redis from 'ioredis';
|
||||||
import { createReadStream, existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
|
import { createReadStream, existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
|
||||||
import { resolve, basename } from 'path';
|
import { resolve, basename } from 'path';
|
||||||
import { createInterface } from 'readline';
|
import { createInterface } from 'readline';
|
||||||
import crypto from 'crypto';
|
import crypto from 'crypto';
|
||||||
|
|
||||||
const ELASTICSEARCH_NODE = process.env.ELASTICSEARCH_NODE || 'http://localhost:9200';
|
const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
|
||||||
|
const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
|
||||||
|
const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
|
||||||
|
const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
|
||||||
const INDEX_NAME = 'hasher';
|
const INDEX_NAME = 'hasher';
|
||||||
const DEFAULT_BATCH_SIZE = 100;
|
const DEFAULT_BATCH_SIZE = 100;
|
||||||
|
|
||||||
@@ -34,7 +38,6 @@ interface HashDocument {
|
|||||||
sha1: string;
|
sha1: string;
|
||||||
sha256: string;
|
sha256: string;
|
||||||
sha512: string;
|
sha512: string;
|
||||||
bcrypt: string;
|
|
||||||
created_at: string;
|
created_at: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -54,6 +57,7 @@ interface ParsedArgs {
|
|||||||
filePath: string | null;
|
filePath: string | null;
|
||||||
batchSize: number;
|
batchSize: number;
|
||||||
resume: boolean;
|
resume: boolean;
|
||||||
|
checkDuplicates: boolean;
|
||||||
stateFile: string | null;
|
stateFile: string | null;
|
||||||
showHelp: boolean;
|
showHelp: boolean;
|
||||||
}
|
}
|
||||||
@@ -63,6 +67,7 @@ function parseArgs(args: string[]): ParsedArgs {
|
|||||||
filePath: null,
|
filePath: null,
|
||||||
batchSize: DEFAULT_BATCH_SIZE,
|
batchSize: DEFAULT_BATCH_SIZE,
|
||||||
resume: true,
|
resume: true,
|
||||||
|
checkDuplicates: true,
|
||||||
stateFile: null,
|
stateFile: null,
|
||||||
showHelp: false
|
showHelp: false
|
||||||
};
|
};
|
||||||
@@ -76,6 +81,8 @@ function parseArgs(args: string[]): ParsedArgs {
|
|||||||
result.resume = true;
|
result.resume = true;
|
||||||
} else if (arg === '--no-resume') {
|
} else if (arg === '--no-resume') {
|
||||||
result.resume = false;
|
result.resume = false;
|
||||||
|
} else if (arg === '--no-check') {
|
||||||
|
result.checkDuplicates = false;
|
||||||
} else if (arg.startsWith('--batch-size=')) {
|
} else if (arg.startsWith('--batch-size=')) {
|
||||||
const value = arg.split('=')[1];
|
const value = arg.split('=')[1];
|
||||||
const parsed = parseInt(value, 10);
|
const parsed = parseInt(value, 10);
|
||||||
@@ -153,16 +160,12 @@ function deleteState(stateFile: string): void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async function generateHashes(plaintext: string): Promise<HashDocument> {
|
async function generateHashes(plaintext: string): Promise<HashDocument> {
|
||||||
const bcrypt = await import('bcrypt');
|
|
||||||
const bcryptHash = await bcrypt.default.hash(plaintext, 10);
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
plaintext,
|
plaintext,
|
||||||
md5: crypto.createHash('md5').update(plaintext).digest('hex'),
|
md5: crypto.createHash('md5').update(plaintext).digest('hex'),
|
||||||
sha1: crypto.createHash('sha1').update(plaintext).digest('hex'),
|
sha1: crypto.createHash('sha1').update(plaintext).digest('hex'),
|
||||||
sha256: crypto.createHash('sha256').update(plaintext).digest('hex'),
|
sha256: crypto.createHash('sha256').update(plaintext).digest('hex'),
|
||||||
sha512: crypto.createHash('sha512').update(plaintext).digest('hex'),
|
sha512: crypto.createHash('sha512').update(plaintext).digest('hex'),
|
||||||
bcrypt: bcryptHash,
|
|
||||||
created_at: new Date().toISOString()
|
created_at: new Date().toISOString()
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -180,29 +183,46 @@ Options:
|
|||||||
--batch-size <number> Alternative syntax for batch size
|
--batch-size <number> Alternative syntax for batch size
|
||||||
--resume Resume from last saved state (default)
|
--resume Resume from last saved state (default)
|
||||||
--no-resume Start from beginning, ignore saved state
|
--no-resume Start from beginning, ignore saved state
|
||||||
|
--no-check Skip duplicate checking (faster, but may create duplicates)
|
||||||
--state-file=<path> Custom state file path
|
--state-file=<path> Custom state file path
|
||||||
--help, -h Show this help message
|
--help, -h Show this help message
|
||||||
|
|
||||||
Environment Variables:
|
Environment Variables:
|
||||||
ELASTICSEARCH_NODE Elasticsearch node URL (default: http://localhost:9200)
|
REDIS_HOST Redis host (default: localhost)
|
||||||
|
REDIS_PORT Redis port (default: 6379)
|
||||||
|
REDIS_PASSWORD Redis password (optional)
|
||||||
|
REDIS_DB Redis database number (default: 0)
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
npx tsx scripts/index-file.ts wordlist.txt
|
npx tsx scripts/index-file.ts wordlist.txt
|
||||||
npx tsx scripts/index-file.ts wordlist.txt --batch-size=500
|
npx tsx scripts/index-file.ts wordlist.txt --batch-size=500
|
||||||
npx tsx scripts/index-file.ts wordlist.txt --batch-size 500
|
npx tsx scripts/index-file.ts wordlist.txt --batch-size 500
|
||||||
npx tsx scripts/index-file.ts wordlist.txt --no-resume
|
npx tsx scripts/index-file.ts wordlist.txt --no-resume
|
||||||
npm run index-file -- wordlist.txt --batch-size=500
|
npx tsx scripts/index-file.ts wordlist.txt --no-check
|
||||||
|
npm run index-file -- wordlist.txt --batch-size=500 --no-check
|
||||||
|
|
||||||
State Management:
|
State Management:
|
||||||
The script automatically saves progress to a state file. If interrupted,
|
The script automatically saves progress to a state file. If interrupted,
|
||||||
it will resume from where it left off on the next run. Use --no-resume
|
it will resume from where it left off on the next run. Use --no-resume
|
||||||
to start fresh.
|
to start fresh.
|
||||||
|
|
||||||
|
Duplicate Checking:
|
||||||
|
By default, the script checks if each plaintext or hash already exists
|
||||||
|
in the index before inserting. Use --no-check to skip this verification
|
||||||
|
for faster indexing (useful when you're sure there are no duplicates).
|
||||||
`);
|
`);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, customStateFile: string | null) {
|
async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, checkDuplicates: boolean, customStateFile: string | null) {
|
||||||
const client = new Client({ node: ELASTICSEARCH_NODE });
|
const client = new Redis({
|
||||||
|
host: REDIS_HOST,
|
||||||
|
port: REDIS_PORT,
|
||||||
|
password: REDIS_PASSWORD,
|
||||||
|
db: REDIS_DB,
|
||||||
|
retryStrategy: (times) => Math.min(times * 50, 2000),
|
||||||
|
});
|
||||||
|
|
||||||
const absolutePath = resolve(filePath);
|
const absolutePath = resolve(filePath);
|
||||||
const stateFile = customStateFile || getDefaultStateFile(absolutePath);
|
const stateFile = customStateFile || getDefaultStateFile(absolutePath);
|
||||||
const fileHash = getFileHash(absolutePath);
|
const fileHash = getFileHash(absolutePath);
|
||||||
@@ -240,10 +260,11 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
|||||||
|
|
||||||
console.log(`📚 Hasher Indexer`);
|
console.log(`📚 Hasher Indexer`);
|
||||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||||
console.log(`Elasticsearch: ${ELASTICSEARCH_NODE}`);
|
console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT} (DB ${REDIS_DB})`);
|
||||||
console.log(`Index: ${INDEX_NAME}`);
|
console.log(`Index: ${INDEX_NAME}`);
|
||||||
console.log(`File: ${filePath}`);
|
console.log(`File: ${filePath}`);
|
||||||
console.log(`Batch size: ${batchSize}`);
|
console.log(`Batch size: ${batchSize}`);
|
||||||
|
console.log(`Check duplicates: ${checkDuplicates ? 'yes' : 'no (--no-check)'}`);
|
||||||
console.log(`State file: ${stateFile}`);
|
console.log(`State file: ${stateFile}`);
|
||||||
if (resumingFrom > 0) {
|
if (resumingFrom > 0) {
|
||||||
console.log(`Resuming from: line ${resumingFrom}`);
|
console.log(`Resuming from: line ${resumingFrom}`);
|
||||||
@@ -273,8 +294,8 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Test connection
|
// Test connection
|
||||||
console.log('🔗 Connecting to Elasticsearch...');
|
console.log('🔗 Connecting to Redis...');
|
||||||
await client.cluster.health({});
|
await client.ping();
|
||||||
console.log('✅ Connected successfully\n');
|
console.log('✅ Connected successfully\n');
|
||||||
|
|
||||||
// Process file line by line using streams
|
// Process file line by line using streams
|
||||||
@@ -297,8 +318,6 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
|||||||
if (batch.length === 0) return;
|
if (batch.length === 0) return;
|
||||||
if (isInterrupted) return;
|
if (isInterrupted) return;
|
||||||
|
|
||||||
const bulkOperations: any[] = [];
|
|
||||||
|
|
||||||
// Generate hashes for all items in batch first
|
// Generate hashes for all items in batch first
|
||||||
const batchWithHashes = await Promise.all(
|
const batchWithHashes = await Promise.all(
|
||||||
batch.map(async (plaintext: string) => ({
|
batch.map(async (plaintext: string) => ({
|
||||||
@@ -307,86 +326,82 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
|||||||
}))
|
}))
|
||||||
);
|
);
|
||||||
|
|
||||||
// Check which items already exist (by plaintext or any hash)
|
const pipeline = client.pipeline();
|
||||||
const md5List = batchWithHashes.map((item: any) => item.hashes.md5);
|
let toIndex: typeof batchWithHashes = [];
|
||||||
const sha1List = batchWithHashes.map((item: any) => item.hashes.sha1);
|
|
||||||
const sha256List = batchWithHashes.map((item: any) => item.hashes.sha256);
|
|
||||||
const sha512List = batchWithHashes.map((item: any) => item.hashes.sha512);
|
|
||||||
|
|
||||||
const existingCheck = await client.search({
|
if (checkDuplicates) {
|
||||||
index: INDEX_NAME,
|
// Check which items already exist
|
||||||
size: batchSize * 5,
|
const existenceChecks = await Promise.all(
|
||||||
query: {
|
batchWithHashes.map(async (item) => {
|
||||||
bool: {
|
const plaintextExists = await client.exists(`hash:plaintext:${item.plaintext}`);
|
||||||
should: [
|
if (plaintextExists) return { item, exists: true };
|
||||||
{ terms: { 'plaintext.keyword': batch } },
|
|
||||||
{ terms: { md5: md5List } },
|
// Check if any hash exists
|
||||||
{ terms: { sha1: sha1List } },
|
const md5Exists = await client.exists(`hash:index:md5:${item.hashes.md5}`);
|
||||||
{ terms: { sha256: sha256List } },
|
const sha1Exists = await client.exists(`hash:index:sha1:${item.hashes.sha1}`);
|
||||||
{ terms: { sha512: sha512List } },
|
const sha256Exists = await client.exists(`hash:index:sha256:${item.hashes.sha256}`);
|
||||||
],
|
const sha512Exists = await client.exists(`hash:index:sha512:${item.hashes.sha512}`);
|
||||||
minimum_should_match: 1
|
|
||||||
|
return {
|
||||||
|
item,
|
||||||
|
exists: md5Exists || sha1Exists || sha256Exists || sha512Exists
|
||||||
|
};
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const check of existenceChecks) {
|
||||||
|
if (check.exists) {
|
||||||
|
state.skipped++;
|
||||||
|
sessionSkipped++;
|
||||||
|
} else {
|
||||||
|
toIndex.push(check.item);
|
||||||
}
|
}
|
||||||
},
|
|
||||||
_source: ['plaintext', 'md5', 'sha1', 'sha256', 'sha512']
|
|
||||||
});
|
|
||||||
|
|
||||||
// Create a set of existing hashes for quick lookup
|
|
||||||
const existingHashes = new Set<string>();
|
|
||||||
existingCheck.hits.hits.forEach((hit: any) => {
|
|
||||||
const src = hit._source;
|
|
||||||
existingHashes.add(src.plaintext);
|
|
||||||
existingHashes.add(src.md5);
|
|
||||||
existingHashes.add(src.sha1);
|
|
||||||
existingHashes.add(src.sha256);
|
|
||||||
existingHashes.add(src.sha512);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Prepare bulk operations only for items that don't have any duplicate hash
|
|
||||||
let batchSkipped = 0;
|
|
||||||
for (const item of batchWithHashes) {
|
|
||||||
const isDuplicate =
|
|
||||||
existingHashes.has(item.plaintext) ||
|
|
||||||
existingHashes.has(item.hashes.md5) ||
|
|
||||||
existingHashes.has(item.hashes.sha1) ||
|
|
||||||
existingHashes.has(item.hashes.sha256) ||
|
|
||||||
existingHashes.has(item.hashes.sha512);
|
|
||||||
|
|
||||||
if (!isDuplicate) {
|
|
||||||
bulkOperations.push({ index: { _index: INDEX_NAME } });
|
|
||||||
bulkOperations.push(item.hashes);
|
|
||||||
} else {
|
|
||||||
batchSkipped++;
|
|
||||||
state.skipped++;
|
|
||||||
sessionSkipped++;
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// No duplicate checking - index everything
|
||||||
|
toIndex = batchWithHashes;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute bulk operation only if there are new items to insert
|
// Execute bulk operations
|
||||||
if (bulkOperations.length > 0) {
|
if (toIndex.length > 0) {
|
||||||
try {
|
try {
|
||||||
const bulkResponse = await client.bulk({
|
for (const item of toIndex) {
|
||||||
operations: bulkOperations,
|
const doc = item.hashes;
|
||||||
refresh: false
|
const key = `hash:plaintext:${doc.plaintext}`;
|
||||||
});
|
|
||||||
|
|
||||||
if (bulkResponse.errors) {
|
// Store main document
|
||||||
const errorCount = bulkResponse.items.filter((item: any) => item.index?.error).length;
|
pipeline.set(key, JSON.stringify(doc));
|
||||||
|
|
||||||
|
// Create indexes for each hash type
|
||||||
|
pipeline.set(`hash:index:md5:${doc.md5}`, doc.plaintext);
|
||||||
|
pipeline.set(`hash:index:sha1:${doc.sha1}`, doc.plaintext);
|
||||||
|
pipeline.set(`hash:index:sha256:${doc.sha256}`, doc.plaintext);
|
||||||
|
pipeline.set(`hash:index:sha512:${doc.sha512}`, doc.plaintext);
|
||||||
|
|
||||||
|
// Update statistics
|
||||||
|
pipeline.hincrby('hash:stats', 'count', 1);
|
||||||
|
pipeline.hincrby('hash:stats', 'size', JSON.stringify(doc).length);
|
||||||
|
}
|
||||||
|
|
||||||
|
const results = await pipeline.exec();
|
||||||
|
|
||||||
|
// Count errors
|
||||||
|
const errorCount = results?.filter(([err]) => err !== null).length || 0;
|
||||||
|
|
||||||
|
if (errorCount > 0) {
|
||||||
state.errors += errorCount;
|
state.errors += errorCount;
|
||||||
sessionErrors += errorCount;
|
sessionErrors += errorCount;
|
||||||
const successCount = (bulkOperations.length / 2) - errorCount;
|
const successCount = toIndex.length - errorCount;
|
||||||
state.indexed += successCount;
|
state.indexed += successCount;
|
||||||
sessionIndexed += successCount;
|
sessionIndexed += successCount;
|
||||||
} else {
|
} else {
|
||||||
const count = bulkOperations.length / 2;
|
state.indexed += toIndex.length;
|
||||||
state.indexed += count;
|
sessionIndexed += toIndex.length;
|
||||||
sessionIndexed += count;
|
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(`\n❌ Error processing batch:`, error);
|
console.error(`\n❌ Error processing batch:`, error);
|
||||||
const count = bulkOperations.length / 2;
|
state.errors += toIndex.length;
|
||||||
state.errors += count;
|
sessionErrors += toIndex.length;
|
||||||
sessionErrors += count;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -438,9 +453,8 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Refresh index
|
// No refresh needed for Redis
|
||||||
console.log('\n\n🔄 Refreshing index...');
|
console.log('\n\n✅ All data persisted to Redis');
|
||||||
await client.indices.refresh({ index: INDEX_NAME });
|
|
||||||
|
|
||||||
// Delete state file on successful completion
|
// Delete state file on successful completion
|
||||||
deleteState(stateFile);
|
deleteState(stateFile);
|
||||||
@@ -498,9 +512,10 @@ console.log(`\n🔧 Configuration:`);
|
|||||||
console.log(` File: ${filePath}`);
|
console.log(` File: ${filePath}`);
|
||||||
console.log(` Batch size: ${parsedArgs.batchSize}`);
|
console.log(` Batch size: ${parsedArgs.batchSize}`);
|
||||||
console.log(` Resume: ${parsedArgs.resume}`);
|
console.log(` Resume: ${parsedArgs.resume}`);
|
||||||
|
console.log(` Check duplicates: ${parsedArgs.checkDuplicates}`);
|
||||||
if (parsedArgs.stateFile) {
|
if (parsedArgs.stateFile) {
|
||||||
console.log(` State file: ${parsedArgs.stateFile}`);
|
console.log(` State file: ${parsedArgs.stateFile}`);
|
||||||
}
|
}
|
||||||
console.log('');
|
console.log('');
|
||||||
|
|
||||||
indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.stateFile).catch(console.error);
|
indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.checkDuplicates, parsedArgs.stateFile).catch(console.error);
|
||||||
|
|||||||
379
scripts/remove-duplicates.ts
Archivo normal
379
scripts/remove-duplicates.ts
Archivo normal
@@ -0,0 +1,379 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hasher Duplicate Remover Script
|
||||||
|
*
|
||||||
|
* This script finds and removes duplicate entries from Redis.
|
||||||
|
* It identifies duplicates by checking plaintext, md5, sha1, sha256, and sha512 fields.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* npx tsx scripts/remove-duplicates.ts [options]
|
||||||
|
* npm run remove-duplicates [-- options]
|
||||||
|
*
|
||||||
|
* Options:
|
||||||
|
* --dry-run Show duplicates without removing them (default)
|
||||||
|
* --execute Actually remove the duplicates
|
||||||
|
* --field=<field> Check duplicates only on this field (plaintext, md5, sha1, sha256, sha512)
|
||||||
|
* --help, -h Show this help message
|
||||||
|
*/
|
||||||
|
|
||||||
|
import Redis from 'ioredis';
|
||||||
|
|
||||||
|
const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
|
||||||
|
const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
|
||||||
|
const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
|
||||||
|
const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
|
||||||
|
const INDEX_NAME = 'hasher';
|
||||||
|
|
||||||
|
interface ParsedArgs {
|
||||||
|
dryRun: boolean;
|
||||||
|
field: string | null;
|
||||||
|
showHelp: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DuplicateGroup {
|
||||||
|
value: string;
|
||||||
|
field: string;
|
||||||
|
plaintexts: string[];
|
||||||
|
keepPlaintext: string;
|
||||||
|
deletePlaintexts: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface HashDocument {
|
||||||
|
plaintext: string;
|
||||||
|
md5: string;
|
||||||
|
sha1: string;
|
||||||
|
sha256: string;
|
||||||
|
sha512: string;
|
||||||
|
created_at: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseArgs(args: string[]): ParsedArgs {
|
||||||
|
const result: ParsedArgs = {
|
||||||
|
dryRun: true,
|
||||||
|
field: null,
|
||||||
|
showHelp: false
|
||||||
|
};
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
const arg = args[i];
|
||||||
|
|
||||||
|
if (arg === '--help' || arg === '-h') {
|
||||||
|
result.showHelp = true;
|
||||||
|
} else if (arg === '--dry-run') {
|
||||||
|
result.dryRun = true;
|
||||||
|
} else if (arg === '--execute') {
|
||||||
|
result.dryRun = false;
|
||||||
|
} else if (arg.startsWith('--field=')) {
|
||||||
|
result.field = arg.split('=')[1];
|
||||||
|
} else if (arg === '--field') {
|
||||||
|
const nextArg = args[i + 1];
|
||||||
|
if (nextArg && !nextArg.startsWith('-')) {
|
||||||
|
result.field = nextArg;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
function showHelp() {
|
||||||
|
console.log(`
|
||||||
|
Hasher Duplicate Remover Script
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
npx tsx scripts/remove-duplicates.ts [options]
|
||||||
|
npm run remove-duplicates [-- options]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--dry-run Show duplicates without removing them (default)
|
||||||
|
--execute Actually remove the duplicates
|
||||||
|
--field=<field> Check duplicates only on this field
|
||||||
|
Valid fields: plaintext, md5, sha1, sha256, sha512
|
||||||
|
--help, -h Show this help message
|
||||||
|
|
||||||
|
Environment Variables:
|
||||||
|
REDIS_HOST Redis host (default: localhost)
|
||||||
|
REDIS_PORT Redis port (default: 6379)
|
||||||
|
REDIS_PASSWORD Redis password (optional)
|
||||||
|
REDIS_DB Redis database number (default: 0)
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
npx tsx scripts/remove-duplicates.ts # Dry run, show all duplicates
|
||||||
|
npx tsx scripts/remove-duplicates.ts --execute # Remove all duplicates
|
||||||
|
npx tsx scripts/remove-duplicates.ts --field=md5 # Check only md5 duplicates
|
||||||
|
npx tsx scripts/remove-duplicates.ts --execute --field=plaintext
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- The script keeps the OLDEST document (by created_at) and removes newer duplicates
|
||||||
|
- Always run with --dry-run first to review what will be deleted
|
||||||
|
- Duplicates are checked across all hash fields by default
|
||||||
|
`);
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function findDuplicatesForField(
|
||||||
|
client: Redis,
|
||||||
|
field: string
|
||||||
|
): Promise<DuplicateGroup[]> {
|
||||||
|
const duplicates: DuplicateGroup[] = [];
|
||||||
|
|
||||||
|
console.log(` Scanning for ${field} duplicates...`);
|
||||||
|
|
||||||
|
// Get all keys for this field type
|
||||||
|
const pattern = field === 'plaintext'
|
||||||
|
? 'hash:plaintext:*'
|
||||||
|
: `hash:index:${field}:*`;
|
||||||
|
|
||||||
|
const keys = await client.keys(pattern);
|
||||||
|
|
||||||
|
// For hash indexes, group by hash value (not plaintext)
|
||||||
|
const valueMap = new Map<string, string[]>();
|
||||||
|
|
||||||
|
if (field === 'plaintext') {
|
||||||
|
// Each key is already unique for plaintext
|
||||||
|
// Check for same plaintext with different created_at
|
||||||
|
for (const key of keys) {
|
||||||
|
const plaintext = key.replace('hash:plaintext:', '');
|
||||||
|
if (!valueMap.has(plaintext)) {
|
||||||
|
valueMap.set(plaintext, []);
|
||||||
|
}
|
||||||
|
valueMap.get(plaintext)!.push(plaintext);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For hash fields, get the plaintext and check if multiple plaintexts have same hash
|
||||||
|
for (const key of keys) {
|
||||||
|
const hashValue = key.replace(`hash:index:${field}:`, '');
|
||||||
|
const plaintext = await client.get(key);
|
||||||
|
|
||||||
|
if (plaintext) {
|
||||||
|
if (!valueMap.has(hashValue)) {
|
||||||
|
valueMap.set(hashValue, []);
|
||||||
|
}
|
||||||
|
valueMap.get(hashValue)!.push(plaintext);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find groups with duplicates
|
||||||
|
for (const [value, plaintexts] of valueMap) {
|
||||||
|
const uniquePlaintexts = Array.from(new Set(plaintexts));
|
||||||
|
|
||||||
|
if (uniquePlaintexts.length > 1) {
|
||||||
|
// Get documents to compare timestamps
|
||||||
|
const docs: { plaintext: string; doc: HashDocument }[] = [];
|
||||||
|
|
||||||
|
for (const plaintext of uniquePlaintexts) {
|
||||||
|
const docKey = `hash:plaintext:${plaintext}`;
|
||||||
|
const docData = await client.get(docKey);
|
||||||
|
if (docData) {
|
||||||
|
docs.push({ plaintext, doc: JSON.parse(docData) });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by created_at (oldest first)
|
||||||
|
docs.sort((a, b) =>
|
||||||
|
new Date(a.doc.created_at).getTime() - new Date(b.doc.created_at).getTime()
|
||||||
|
);
|
||||||
|
|
||||||
|
if (docs.length > 1) {
|
||||||
|
duplicates.push({
|
||||||
|
value,
|
||||||
|
field,
|
||||||
|
plaintexts: docs.map(d => d.plaintext),
|
||||||
|
keepPlaintext: docs[0].plaintext,
|
||||||
|
deletePlaintexts: docs.slice(1).map(d => d.plaintext)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return duplicates;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||||
|
const client = new Redis({
|
||||||
|
host: REDIS_HOST,
|
||||||
|
port: REDIS_PORT,
|
||||||
|
password: REDIS_PASSWORD,
|
||||||
|
db: REDIS_DB,
|
||||||
|
});
|
||||||
|
|
||||||
|
const fields = parsedArgs.field
|
||||||
|
? [parsedArgs.field]
|
||||||
|
: ['md5', 'sha1', 'sha256', 'sha512'];
|
||||||
|
|
||||||
|
console.log(`🔍 Hasher Duplicate Remover`);
|
||||||
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||||
|
console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT} (DB ${REDIS_DB})`);
|
||||||
|
console.log(`Index: ${INDEX_NAME}`);
|
||||||
|
console.log(`Mode: ${parsedArgs.dryRun ? '🔎 DRY RUN (no changes)' : '⚠️ EXECUTE (will delete)'}`);
|
||||||
|
console.log(`Fields to check: ${fields.join(', ')}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Test connection
|
||||||
|
console.log('🔗 Connecting to Redis...');
|
||||||
|
await client.ping();
|
||||||
|
console.log('✅ Connected successfully\n');
|
||||||
|
|
||||||
|
// Get index stats
|
||||||
|
const stats = await client.hgetall('hash:stats');
|
||||||
|
const totalCount = parseInt(stats.count || '0', 10);
|
||||||
|
console.log(`📊 Total documents in index: ${totalCount}\n`);
|
||||||
|
|
||||||
|
const allDuplicates: DuplicateGroup[] = [];
|
||||||
|
const seenPlaintexts = new Set<string>();
|
||||||
|
|
||||||
|
// Find duplicates for each field
|
||||||
|
for (const field of fields) {
|
||||||
|
console.log(`🔍 Checking duplicates for field: ${field}...`);
|
||||||
|
const fieldDuplicates = await findDuplicatesForField(client, field);
|
||||||
|
|
||||||
|
// Filter out already seen plaintexts
|
||||||
|
for (const dup of fieldDuplicates) {
|
||||||
|
const newDeletePlaintexts = dup.deletePlaintexts.filter(p => !seenPlaintexts.has(p));
|
||||||
|
if (newDeletePlaintexts.length > 0) {
|
||||||
|
dup.deletePlaintexts = newDeletePlaintexts;
|
||||||
|
newDeletePlaintexts.forEach(p => seenPlaintexts.add(p));
|
||||||
|
allDuplicates.push(dup);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(` Found ${fieldDuplicates.length} duplicate groups for ${field}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalToDelete = allDuplicates.reduce((sum, dup) => sum + dup.deletePlaintexts.length, 0);
|
||||||
|
|
||||||
|
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||||
|
console.log(`📋 Summary:`);
|
||||||
|
console.log(` Duplicate groups found: ${allDuplicates.length}`);
|
||||||
|
console.log(` Documents to delete: ${totalToDelete}`);
|
||||||
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
|
||||||
|
|
||||||
|
if (allDuplicates.length === 0) {
|
||||||
|
console.log('✨ No duplicates found! Index is clean.\n');
|
||||||
|
await client.quit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show sample of duplicates
|
||||||
|
console.log(`📝 Sample duplicates (showing first 10):\n`);
|
||||||
|
const samplesToShow = allDuplicates.slice(0, 10);
|
||||||
|
for (const dup of samplesToShow) {
|
||||||
|
const truncatedValue = dup.value.length > 50
|
||||||
|
? dup.value.substring(0, 50) + '...'
|
||||||
|
: dup.value;
|
||||||
|
console.log(` Field: ${dup.field}`);
|
||||||
|
console.log(` Value: ${truncatedValue}`);
|
||||||
|
console.log(` Keep: ${dup.keepPlaintext}`);
|
||||||
|
console.log(` Delete: ${dup.deletePlaintexts.length} document(s)`);
|
||||||
|
console.log('');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allDuplicates.length > 10) {
|
||||||
|
console.log(` ... and ${allDuplicates.length - 10} more duplicate groups\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parsedArgs.dryRun) {
|
||||||
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||||
|
console.log(`🔎 DRY RUN - No changes made`);
|
||||||
|
console.log(` Run with --execute to remove ${totalToDelete} duplicate documents`);
|
||||||
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
|
||||||
|
await client.quit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute deletion
|
||||||
|
console.log(`\n🗑️ Removing ${totalToDelete} duplicate documents...\n`);
|
||||||
|
|
||||||
|
let deleted = 0;
|
||||||
|
let errors = 0;
|
||||||
|
|
||||||
|
for (const dup of allDuplicates) {
|
||||||
|
for (const plaintext of dup.deletePlaintexts) {
|
||||||
|
try {
|
||||||
|
const docKey = `hash:plaintext:${plaintext}`;
|
||||||
|
const docData = await client.get(docKey);
|
||||||
|
|
||||||
|
if (docData) {
|
||||||
|
const doc: HashDocument = JSON.parse(docData);
|
||||||
|
const pipeline = client.pipeline();
|
||||||
|
|
||||||
|
// Delete main document
|
||||||
|
pipeline.del(docKey);
|
||||||
|
|
||||||
|
// Delete all indexes
|
||||||
|
pipeline.del(`hash:index:md5:${doc.md5}`);
|
||||||
|
pipeline.del(`hash:index:sha1:${doc.sha1}`);
|
||||||
|
pipeline.del(`hash:index:sha256:${doc.sha256}`);
|
||||||
|
pipeline.del(`hash:index:sha512:${doc.sha512}`);
|
||||||
|
|
||||||
|
// Update statistics
|
||||||
|
pipeline.hincrby('hash:stats', 'count', -1);
|
||||||
|
pipeline.hincrby('hash:stats', 'size', -JSON.stringify(doc).length);
|
||||||
|
|
||||||
|
const results = await pipeline.exec();
|
||||||
|
|
||||||
|
if (results && results.some(([err]) => err !== null)) {
|
||||||
|
errors++;
|
||||||
|
} else {
|
||||||
|
deleted++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
process.stdout.write(`\r⏳ Progress: ${deleted + errors}/${totalToDelete} - Deleted: ${deleted}, Errors: ${errors}`);
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`\n❌ Error deleting ${plaintext}:`, error);
|
||||||
|
errors++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get new count
|
||||||
|
const newStats = await client.hgetall('hash:stats');
|
||||||
|
const newCount = parseInt(newStats.count || '0', 10);
|
||||||
|
|
||||||
|
console.log('\n\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||||||
|
console.log('✅ Duplicate removal complete!');
|
||||||
|
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||||
|
console.log(`Documents deleted: ${deleted}`);
|
||||||
|
console.log(`Errors: ${errors}`);
|
||||||
|
console.log(`Previous document count: ${totalCount}`);
|
||||||
|
console.log(`New document count: ${newCount}`);
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
await client.quit();
|
||||||
|
} catch (error) {
|
||||||
|
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse command line arguments
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
const parsedArgs = parseArgs(args);
|
||||||
|
|
||||||
|
if (parsedArgs.showHelp) {
|
||||||
|
showHelp();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate field if provided
|
||||||
|
const validFields = ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];
|
||||||
|
if (parsedArgs.field && !validFields.includes(parsedArgs.field)) {
|
||||||
|
console.error(`❌ Invalid field: ${parsedArgs.field}`);
|
||||||
|
console.error(` Valid fields: ${validFields.join(', ')}`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`\n🔧 Configuration:`);
|
||||||
|
console.log(` Mode: ${parsedArgs.dryRun ? 'dry-run' : 'execute'}`);
|
||||||
|
if (parsedArgs.field) {
|
||||||
|
console.log(` Field: ${parsedArgs.field}`);
|
||||||
|
} else {
|
||||||
|
console.log(` Fields: all (md5, sha1, sha256, sha512)`);
|
||||||
|
}
|
||||||
|
console.log('');
|
||||||
|
|
||||||
|
removeDuplicates(parsedArgs).catch(console.error);
|
||||||
Referencia en una nueva incidencia
Block a user