Comparar commits
2 Commits
| Autor | SHA1 | Fecha | |
|---|---|---|---|
|
e3525c1673
|
|||
|
4d9545d0ec
|
20
.env.example
20
.env.example
@@ -1,5 +1,17 @@
|
||||
# Elasticsearch Configuration
|
||||
ELASTICSEARCH_NODE=http://localhost:9200
|
||||
# Redis Configuration
|
||||
# Optional: Customize Redis connection settings
|
||||
|
||||
# Optional: Set to 'development' or 'production'
|
||||
# NODE_ENV=development
|
||||
# Redis host (default: localhost)
|
||||
REDIS_HOST=localhost
|
||||
|
||||
# Redis port (default: 6379)
|
||||
REDIS_PORT=6379
|
||||
|
||||
# Redis password (optional, required if Redis has authentication enabled)
|
||||
# REDIS_PASSWORD=your-secure-password
|
||||
|
||||
# Redis database number (default: 0)
|
||||
# REDIS_DB=0
|
||||
|
||||
# Node Environment
|
||||
NODE_ENV=development
|
||||
|
||||
17
API.md
17
API.md
@@ -140,9 +140,9 @@ No parameters required.
|
||||
{
|
||||
"status": "ok",
|
||||
"redis": {
|
||||
"version": "7.2.0",
|
||||
"memory": "1.5M",
|
||||
"dbSize": 1542
|
||||
"version": "7.2.4",
|
||||
"connected": true,
|
||||
"memoryUsed": "1.5M"
|
||||
},
|
||||
"stats": {
|
||||
"count": 1542,
|
||||
@@ -151,10 +151,9 @@ No parameters required.
|
||||
}
|
||||
```
|
||||
|
||||
**Redis status fields**:
|
||||
- `version`: Redis server version
|
||||
- `memory`: Memory used by Redis
|
||||
- `dbSize`: Total number of keys in database
|
||||
**Redis connection status**:
|
||||
- `connected: true`: Redis is connected and responding
|
||||
- `connected: false`: Redis connection failed
|
||||
|
||||
**Error** (503 Service Unavailable):
|
||||
```json
|
||||
@@ -249,7 +248,7 @@ The API accepts requests from any origin by default. For production deployment,
|
||||
## Notes
|
||||
|
||||
- All timestamps are in ISO 8601 format
|
||||
- The API automatically creates Redis keys with proper structure
|
||||
- Plaintext searches are automatically stored for future lookups
|
||||
- The API automatically creates Redis keys as needed
|
||||
- Plaintext searches are automatically indexed for future lookups
|
||||
- Searches are case-insensitive
|
||||
- Hashes must be valid hexadecimal strings
|
||||
|
||||
64
CHANGELOG.md
64
CHANGELOG.md
@@ -5,37 +5,6 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [2.0.0] - 2025-12-03
|
||||
|
||||
### Changed
|
||||
|
||||
#### Major Backend Migration
|
||||
- **Breaking Change**: Migrated from Elasticsearch to Redis for improved performance
|
||||
- Replaced Elasticsearch Client with ioredis for Redis operations
|
||||
- Redesigned data structure using Redis key patterns
|
||||
- Implemented O(1) hash lookups using Redis indexes
|
||||
- Significantly reduced search latency (< 10ms typical)
|
||||
|
||||
#### New Redis Architecture
|
||||
- Document storage: `hash:plaintext:{plaintext}` keys
|
||||
- Hash indexes: `hash:index:{algorithm}:{hash}` for fast lookups
|
||||
- Statistics tracking: `hash:stats` Redis Hash
|
||||
- Pipeline operations for atomic batch writes
|
||||
- Connection pooling with automatic retry strategy
|
||||
|
||||
### Updated
|
||||
|
||||
#### Configuration
|
||||
- Environment variables changed from `ELASTICSEARCH_NODE` to `REDIS_HOST`, `REDIS_PORT`, `REDIS_PASSWORD`, `REDIS_DB`
|
||||
- Simplified connection setup with sensible defaults
|
||||
- Optional Redis authentication support
|
||||
|
||||
#### Performance Improvements
|
||||
- Search latency reduced to < 10ms (from ~50ms)
|
||||
- Bulk indexing maintained at 1000-5000 docs/sec
|
||||
- Lower memory footprint
|
||||
- Better concurrent request handling (100+ users)
|
||||
|
||||
## [1.0.0] - 2025-12-03
|
||||
|
||||
### Added
|
||||
@@ -49,11 +18,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
#### Backend
|
||||
- Redis integration with ioredis
|
||||
- Key-value storage with hash indexes
|
||||
- Automatic key structure initialization
|
||||
- Auto-storage of searched plaintext for future lookups
|
||||
- Custom index mapping with 10 shards for horizontal scaling
|
||||
- Automatic index creation on first use
|
||||
- Auto-indexing of searched plaintext for future lookups
|
||||
- RESTful API endpoints for search and health checks
|
||||
- Case-insensitive searches
|
||||
- Lowercase analyzer for case-insensitive searches
|
||||
|
||||
#### Frontend
|
||||
- Modern, responsive UI with gradient design
|
||||
@@ -109,32 +78,27 @@ hasher/
|
||||
│ ├── redis.ts # Redis client
|
||||
│ └── hash.ts # Hash utilities
|
||||
├── scripts/ # CLI scripts
|
||||
│ ├── index-file.ts # Bulk indexer
|
||||
│ └── remove-duplicates.ts # Duplicate removal
|
||||
│ └── index-file.ts # Bulk indexer
|
||||
└── docs/ # Documentation
|
||||
```
|
||||
|
||||
#### Redis Data Structure
|
||||
- Main documents: `hash:plaintext:{plaintext}`
|
||||
- MD5 index: `hash:index:md5:{hash}`
|
||||
- SHA1 index: `hash:index:sha1:{hash}`
|
||||
- SHA256 index: `hash:index:sha256:{hash}`
|
||||
- SHA512 index: `hash:index:sha512:{hash}`
|
||||
- Statistics: `hash:stats` (Redis Hash with count and size)
|
||||
- Index name: `hasher`
|
||||
- Shards: 10
|
||||
- Replicas: 1
|
||||
- Fields: plaintext, md5, sha1, sha256, sha512, created_at
|
||||
|
||||
### Configuration
|
||||
|
||||
#### Environment Variables
|
||||
- `REDIS_HOST`: Redis host (default: localhost)
|
||||
- `REDIS_PORT`: Redis port (default: 6379)
|
||||
- `REDIS_PASSWORD`: Redis password (optional)
|
||||
- `REDIS_DB`: Redis database number (default: 0)
|
||||
- `REDIS_HOST`: Redis server host (default: localhost)
|
||||
- `REDIS_PORT`: Redis server port (default: 6379)
|
||||
- `REDIS_PASSWORD`: Redis authentication password (optional)
|
||||
|
||||
#### Performance
|
||||
- Bulk indexing: 1000-5000 docs/sec
|
||||
- Search latency: < 10ms typical (O(1) lookups)
|
||||
- Horizontal scaling ready with Redis Cluster
|
||||
- Lower memory footprint than Elasticsearch
|
||||
- Search latency: < 50ms typical
|
||||
- Horizontal scaling ready
|
||||
|
||||
### Security
|
||||
- Input validation on all endpoints
|
||||
|
||||
@@ -16,7 +16,7 @@ Thank you for considering contributing to Hasher! This document provides guideli
|
||||
## 🎯 Areas for Contribution
|
||||
|
||||
### Features
|
||||
- Additional hash algorithms (argon2, etc.)
|
||||
- Additional hash algorithms (bcrypt validation, argon2, etc.)
|
||||
- Export functionality (CSV, JSON)
|
||||
- Search history
|
||||
- Batch hash lookup
|
||||
|
||||
290
DEPLOYMENT.md
290
DEPLOYMENT.md
@@ -35,15 +35,14 @@ Vercel provides seamless deployment for Next.js applications.
|
||||
4. **Set Environment Variables**:
|
||||
- Go to your project settings on Vercel
|
||||
- Add environment variables:
|
||||
- `REDIS_HOST=your-redis-host`
|
||||
- `REDIS_HOST=your-redis-host.com`
|
||||
- `REDIS_PORT=6379`
|
||||
- `REDIS_PASSWORD=your-password` (if using authentication)
|
||||
- `REDIS_DB=0`
|
||||
- `REDIS_PASSWORD=your-secure-password` (if using authentication)
|
||||
- Redeploy: `vercel --prod`
|
||||
|
||||
#### Important Notes:
|
||||
- Ensure Redis is accessible from Vercel's servers
|
||||
- Consider using Redis Cloud (Upstash) or a publicly accessible Redis instance
|
||||
- Consider using [Upstash](https://upstash.com) or [Redis Cloud](https://redis.com/try-free/) for managed Redis
|
||||
- Use environment variables for sensitive configuration
|
||||
|
||||
---
|
||||
@@ -63,7 +62,7 @@ FROM base AS deps
|
||||
RUN apk add --no-cache libc6-compat
|
||||
WORKDIR /app
|
||||
|
||||
COPY package.json package-lock.json ./
|
||||
COPY package.json package-lock.json* ./
|
||||
RUN npm ci
|
||||
|
||||
# Rebuild the source code only when needed
|
||||
@@ -72,15 +71,13 @@ WORKDIR /app
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
COPY . .
|
||||
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
RUN npm run build
|
||||
|
||||
# Production image, copy all the files and run next
|
||||
FROM base AS runner
|
||||
WORKDIR /app
|
||||
|
||||
ENV NODE_ENV=production
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
ENV NODE_ENV production
|
||||
|
||||
RUN addgroup --system --gid 1001 nodejs
|
||||
RUN adduser --system --uid 1001 nextjs
|
||||
@@ -93,24 +90,11 @@ USER nextjs
|
||||
|
||||
EXPOSE 3000
|
||||
|
||||
ENV PORT=3000
|
||||
ENV HOSTNAME="0.0.0.0"
|
||||
ENV PORT 3000
|
||||
|
||||
CMD ["node", "server.js"]
|
||||
```
|
||||
|
||||
#### Update next.config.ts:
|
||||
|
||||
```typescript
|
||||
import type { NextConfig } from 'next';
|
||||
|
||||
const nextConfig: NextConfig = {
|
||||
output: 'standalone',
|
||||
};
|
||||
|
||||
export default nextConfig;
|
||||
```
|
||||
|
||||
#### Build and Run:
|
||||
|
||||
```bash
|
||||
@@ -122,6 +106,7 @@ docker run -d \
|
||||
-p 3000:3000 \
|
||||
-e REDIS_HOST=redis \
|
||||
-e REDIS_PORT=6379 \
|
||||
-e REDIS_PASSWORD=your-password \
|
||||
--name hasher \
|
||||
hasher:latest
|
||||
```
|
||||
@@ -141,18 +126,19 @@ services:
|
||||
environment:
|
||||
- REDIS_HOST=redis
|
||||
- REDIS_PORT=6379
|
||||
- REDIS_PASSWORD=your-secure-password
|
||||
depends_on:
|
||||
- redis
|
||||
restart: unless-stopped
|
||||
|
||||
redis:
|
||||
image: redis:7-alpine
|
||||
command: redis-server --requirepass your-secure-password --appendonly yes
|
||||
ports:
|
||||
- "6379:6379"
|
||||
volumes:
|
||||
- redis-data:/data
|
||||
restart: unless-stopped
|
||||
command: redis-server --appendonly yes
|
||||
|
||||
volumes:
|
||||
redis-data:
|
||||
@@ -176,13 +162,28 @@ curl -fsSL https://deb.nodesource.com/setup_18.x | sudo -E bash -
|
||||
sudo apt-get install -y nodejs
|
||||
```
|
||||
|
||||
#### 2. Install PM2 (Process Manager):
|
||||
#### 2. Install Redis:
|
||||
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install redis-server
|
||||
|
||||
# Configure Redis
|
||||
sudo nano /etc/redis/redis.conf
|
||||
# Set: requirepass your-strong-password
|
||||
|
||||
# Start Redis
|
||||
sudo systemctl start redis-server
|
||||
sudo systemctl enable redis-server
|
||||
```
|
||||
|
||||
#### 3. Install PM2 (Process Manager):
|
||||
|
||||
```bash
|
||||
sudo npm install -g pm2
|
||||
```
|
||||
|
||||
#### 3. Clone and Build:
|
||||
#### 4. Clone and Build:
|
||||
|
||||
```bash
|
||||
cd /var/www
|
||||
@@ -192,19 +193,18 @@ npm install
|
||||
npm run build
|
||||
```
|
||||
|
||||
#### 4. Configure Environment:
|
||||
#### 5. Configure Environment:
|
||||
|
||||
```bash
|
||||
cat > .env.local << EOF
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=your-password
|
||||
REDIS_DB=0
|
||||
REDIS_PASSWORD=your-strong-password
|
||||
NODE_ENV=production
|
||||
EOF
|
||||
```
|
||||
|
||||
#### 5. Start with PM2:
|
||||
#### 6. Start with PM2:
|
||||
|
||||
```bash
|
||||
pm2 start npm --name "hasher" -- start
|
||||
@@ -212,7 +212,7 @@ pm2 save
|
||||
pm2 startup
|
||||
```
|
||||
|
||||
#### 6. Configure Nginx (Optional):
|
||||
#### 7. Configure Nginx (Optional):
|
||||
|
||||
```nginx
|
||||
server {
|
||||
@@ -241,28 +241,42 @@ sudo systemctl reload nginx
|
||||
|
||||
## Redis Setup
|
||||
|
||||
### Option 1: Redis Cloud (Managed)
|
||||
### Option 1: Managed Redis (Recommended)
|
||||
|
||||
1. Sign up at [Redis Cloud](https://redis.com/try-free/) or [Upstash](https://upstash.com/)
|
||||
#### Upstash (Serverless Redis)
|
||||
1. Sign up at [Upstash](https://upstash.com)
|
||||
2. Create a database
|
||||
3. Note the connection details (host, port, password)
|
||||
4. Update `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD` environment variables
|
||||
3. Copy connection details
|
||||
4. Update environment variables
|
||||
|
||||
### Option 2: Self-Hosted
|
||||
#### Redis Cloud
|
||||
1. Sign up at [Redis Cloud](https://redis.com/try-free/)
|
||||
2. Create a database
|
||||
3. Note the endpoint and password
|
||||
4. Update `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD`
|
||||
|
||||
### Option 2: Self-Hosted Redis
|
||||
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt-get update
|
||||
sudo apt-get install redis-server
|
||||
|
||||
# Configure
|
||||
# Configure Redis security
|
||||
sudo nano /etc/redis/redis.conf
|
||||
# Set: bind 0.0.0.0 (to allow remote connections)
|
||||
# Set: requirepass your-strong-password (for security)
|
||||
|
||||
# Start
|
||||
# Important settings:
|
||||
# bind 127.0.0.1 ::1 # Only local connections (remove for remote)
|
||||
# requirepass your-strong-password
|
||||
# maxmemory 256mb
|
||||
# maxmemory-policy allkeys-lru
|
||||
|
||||
# Start Redis
|
||||
sudo systemctl start redis-server
|
||||
sudo systemctl enable redis-server
|
||||
|
||||
# Test connection
|
||||
redis-cli -a your-strong-password ping
|
||||
```
|
||||
|
||||
---
|
||||
@@ -271,11 +285,16 @@ sudo systemctl enable redis-server
|
||||
|
||||
### 1. Redis Security
|
||||
|
||||
- Enable authentication with requirepass
|
||||
- Use TLS for Redis connections (Redis 6+)
|
||||
- Restrict network access with firewall rules
|
||||
- Update credentials regularly
|
||||
- Disable dangerous commands (FLUSHDB, FLUSHALL, etc.)
|
||||
- **Always** use a strong password with `requirepass`
|
||||
- Bind Redis to localhost if possible (`bind 127.0.0.1`)
|
||||
- Use TLS/SSL for remote connections (Redis 6+)
|
||||
- Disable dangerous commands:
|
||||
```
|
||||
rename-command FLUSHDB ""
|
||||
rename-command FLUSHALL ""
|
||||
rename-command CONFIG ""
|
||||
```
|
||||
- Set memory limits to prevent OOM
|
||||
|
||||
### 2. Application Security
|
||||
|
||||
@@ -291,7 +310,7 @@ sudo systemctl enable redis-server
|
||||
# Example UFW firewall rules
|
||||
sudo ufw allow 80/tcp
|
||||
sudo ufw allow 443/tcp
|
||||
sudo ufw allow from YOUR_IP to any port 6379 # Redis
|
||||
sudo ufw allow from YOUR_IP to any port 6379 # Redis (if remote)
|
||||
sudo ufw enable
|
||||
```
|
||||
|
||||
@@ -312,44 +331,92 @@ pm2 logs hasher
|
||||
### Redis Monitoring
|
||||
|
||||
```bash
|
||||
# Health check
|
||||
# Test connection
|
||||
redis-cli ping
|
||||
|
||||
# Get info
|
||||
# Get server info
|
||||
redis-cli INFO
|
||||
|
||||
# Database stats
|
||||
redis-cli INFO stats
|
||||
# Monitor commands
|
||||
redis-cli MONITOR
|
||||
|
||||
# Memory usage
|
||||
# Check memory usage
|
||||
redis-cli INFO memory
|
||||
|
||||
# Check stats
|
||||
redis-cli INFO stats
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Backup and Recovery
|
||||
|
||||
### Redis Backups
|
||||
### Redis Persistence
|
||||
|
||||
Redis offers two persistence options:
|
||||
|
||||
#### RDB (Redis Database Backup)
|
||||
```bash
|
||||
# Enable AOF (Append Only File) persistence
|
||||
redis-cli CONFIG SET appendonly yes
|
||||
|
||||
# Save RDB snapshot manually
|
||||
redis-cli SAVE
|
||||
|
||||
# Configure automatic backups in redis.conf
|
||||
# Configure in redis.conf
|
||||
save 900 1 # Save if 1 key changed in 15 minutes
|
||||
save 300 10 # Save if 10 keys changed in 5 minutes
|
||||
save 60 10000 # Save if 10000 keys changed in 1 minute
|
||||
|
||||
# Backup files location (default)
|
||||
# RDB: /var/lib/redis/dump.rdb
|
||||
# AOF: /var/lib/redis/appendonly.aof
|
||||
# Manual snapshot
|
||||
redis-cli SAVE
|
||||
|
||||
# Restore from backup
|
||||
# Backup file location
|
||||
/var/lib/redis/dump.rdb
|
||||
```
|
||||
|
||||
#### AOF (Append Only File)
|
||||
```bash
|
||||
# Enable in redis.conf
|
||||
appendonly yes
|
||||
appendfilename "appendonly.aof"
|
||||
|
||||
# Sync options
|
||||
appendfsync everysec # Good balance
|
||||
|
||||
# Backup file location
|
||||
/var/lib/redis/appendonly.aof
|
||||
```
|
||||
|
||||
### Backup Script
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# backup-redis.sh
|
||||
|
||||
BACKUP_DIR="/backup/redis"
|
||||
DATE=$(date +%Y%m%d_%H%M%S)
|
||||
|
||||
# Create backup directory
|
||||
mkdir -p $BACKUP_DIR
|
||||
|
||||
# Trigger Redis save
|
||||
redis-cli -a your-password SAVE
|
||||
|
||||
# Copy RDB file
|
||||
cp /var/lib/redis/dump.rdb $BACKUP_DIR/dump_$DATE.rdb
|
||||
|
||||
# Keep only last 7 days
|
||||
find $BACKUP_DIR -name "dump_*.rdb" -mtime +7 -delete
|
||||
|
||||
echo "Backup completed: dump_$DATE.rdb"
|
||||
```
|
||||
|
||||
### Restore from Backup
|
||||
|
||||
```bash
|
||||
# Stop Redis
|
||||
sudo systemctl stop redis-server
|
||||
sudo cp /backup/dump.rdb /var/lib/redis/
|
||||
|
||||
# Replace dump file
|
||||
sudo cp /backup/redis/dump_YYYYMMDD_HHMMSS.rdb /var/lib/redis/dump.rdb
|
||||
sudo chown redis:redis /var/lib/redis/dump.rdb
|
||||
|
||||
# Start Redis
|
||||
sudo systemctl start redis-server
|
||||
```
|
||||
|
||||
@@ -360,15 +427,24 @@ sudo systemctl start redis-server
|
||||
### Horizontal Scaling
|
||||
|
||||
1. Deploy multiple Next.js instances
|
||||
2. Use a load balancer (nginx, HAProxy)
|
||||
3. Share the same Redis instance or cluster
|
||||
2. Use a load balancer (nginx, HAProxy, Cloudflare)
|
||||
3. Share the same Redis instance
|
||||
|
||||
### Redis Scaling
|
||||
### Redis Scaling Options
|
||||
|
||||
1. Use Redis Cluster for horizontal scaling
|
||||
2. Set up Redis Sentinel for high availability
|
||||
3. Use read replicas for read-heavy workloads
|
||||
4. Consider Redis Enterprise for advanced features
|
||||
#### 1. Redis Cluster
|
||||
- Automatic sharding across multiple nodes
|
||||
- High availability with automatic failover
|
||||
- Good for very large datasets
|
||||
|
||||
#### 2. Redis Sentinel
|
||||
- High availability without sharding
|
||||
- Automatic failover
|
||||
- Monitoring and notifications
|
||||
|
||||
#### 3. Read Replicas
|
||||
- Separate read and write operations
|
||||
- Scale read capacity
|
||||
|
||||
---
|
||||
|
||||
@@ -384,28 +460,37 @@ pm2 logs hasher --lines 100
|
||||
### Check Redis
|
||||
|
||||
```bash
|
||||
# Test connection
|
||||
redis-cli ping
|
||||
|
||||
# Check memory
|
||||
redis-cli INFO memory
|
||||
|
||||
# Count keys
|
||||
redis-cli DBSIZE
|
||||
|
||||
# Get stats
|
||||
redis-cli INFO stats
|
||||
```
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Issue**: Cannot connect to Redis
|
||||
- Check firewall rules
|
||||
- Verify Redis is running: `redis-cli ping`
|
||||
- Check `REDIS_HOST`, `REDIS_PORT`, and `REDIS_PASSWORD` environment variables
|
||||
- Check if Redis is running: `sudo systemctl status redis-server`
|
||||
- Verify firewall rules
|
||||
- Check `REDIS_HOST` and `REDIS_PORT` environment variables
|
||||
- Verify password is correct
|
||||
|
||||
**Issue**: Out of memory
|
||||
- Increase Node.js memory: `NODE_OPTIONS=--max-old-space-size=4096`
|
||||
- Configure Redis maxmemory and eviction policy
|
||||
- Use Redis persistence (RDB/AOF) carefully
|
||||
- Configure Redis maxmemory
|
||||
- Set appropriate eviction policy
|
||||
|
||||
**Issue**: Slow searches
|
||||
- Verify O(1) lookups are being used (direct key access)
|
||||
- Check Redis memory and CPU usage
|
||||
- Consider using Redis Cluster for distribution
|
||||
- Optimize key patterns
|
||||
- Check Redis memory usage
|
||||
- Verify O(1) key lookups are being used
|
||||
- Monitor Redis with `redis-cli MONITOR`
|
||||
- Consider Redis Cluster for very large datasets
|
||||
|
||||
---
|
||||
|
||||
@@ -413,10 +498,25 @@ redis-cli INFO stats
|
||||
|
||||
1. **Enable Next.js Static Optimization**
|
||||
2. **Use CDN for static assets**
|
||||
3. **Enable Redis pipelining for bulk operations**
|
||||
4. **Configure appropriate maxmemory for Redis**
|
||||
3. **Configure Redis pipelining** (already implemented)
|
||||
4. **Set appropriate maxmemory and eviction policy**
|
||||
5. **Use SSD storage for Redis persistence**
|
||||
6. **Enable Redis connection pooling (already implemented)**
|
||||
6. **Enable connection pooling** (already implemented)
|
||||
7. **Monitor and optimize Redis memory usage**
|
||||
|
||||
---
|
||||
|
||||
## Environment Variables
|
||||
|
||||
| Variable | Description | Default | Required |
|
||||
|----------|-------------|---------|----------|
|
||||
| `REDIS_HOST` | Redis server hostname | `localhost` | No |
|
||||
| `REDIS_PORT` | Redis server port | `6379` | No |
|
||||
| `REDIS_PASSWORD` | Redis authentication password | - | No* |
|
||||
| `NODE_ENV` | Node environment | `development` | No |
|
||||
| `PORT` | Application port | `3000` | No |
|
||||
|
||||
*Required if Redis has authentication enabled
|
||||
|
||||
---
|
||||
|
||||
@@ -424,6 +524,28 @@ redis-cli INFO stats
|
||||
|
||||
For deployment issues, check:
|
||||
- [Next.js Deployment Docs](https://nextjs.org/docs/deployment)
|
||||
- [Redis Setup Guide](https://redis.io/docs/getting-started/)
|
||||
- [ioredis Documentation](https://github.com/redis/ioredis)
|
||||
- [Redis Documentation](https://redis.io/docs/)
|
||||
- [Upstash Documentation](https://docs.upstash.com/)
|
||||
- Project GitHub Issues
|
||||
|
||||
---
|
||||
|
||||
## Deployment Checklist
|
||||
|
||||
Before going live:
|
||||
|
||||
- [ ] Redis is secured with password
|
||||
- [ ] Environment variables are configured
|
||||
- [ ] SSL/TLS certificates are installed
|
||||
- [ ] Firewall rules are configured
|
||||
- [ ] Monitoring is set up
|
||||
- [ ] Backup strategy is in place
|
||||
- [ ] Load testing completed
|
||||
- [ ] Error logging configured
|
||||
- [ ] Redis persistence (RDB/AOF) configured
|
||||
- [ ] Rate limiting implemented (if needed)
|
||||
- [ ] Documentation is up to date
|
||||
|
||||
---
|
||||
|
||||
**Ready to deploy! 🚀**
|
||||
|
||||
@@ -26,9 +26,9 @@
|
||||
|
||||
### 📊 Backend
|
||||
- Redis integration with ioredis
|
||||
- Key-value storage with hash indexes
|
||||
- 10-shard index for horizontal scaling
|
||||
- RESTful API with JSON responses
|
||||
- Automatic key structure initialization
|
||||
- Automatic index creation and initialization
|
||||
- Health monitoring endpoint
|
||||
|
||||
### 🎨 Frontend
|
||||
@@ -139,34 +139,30 @@ npm run index-file wordlist.txt -- --batch-size 500
|
||||
|
||||
### Environment Configuration
|
||||
```bash
|
||||
# Optional: Set Redis connection details
|
||||
# Optional: Set Redis connection
|
||||
export REDIS_HOST=localhost
|
||||
export REDIS_PORT=6379
|
||||
export REDIS_PASSWORD=your-password
|
||||
export REDIS_DB=0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🗄️ Redis Data Structure
|
||||
|
||||
### Key Patterns
|
||||
- **Documents**: `hash:plaintext:{plaintext}` - Main document storage
|
||||
- **MD5 Index**: `hash:index:md5:{hash}` - MD5 hash lookup
|
||||
- **SHA1 Index**: `hash:index:sha1:{hash}` - SHA1 hash lookup
|
||||
- **SHA256 Index**: `hash:index:sha256:{hash}` - SHA256 hash lookup
|
||||
- **SHA512 Index**: `hash:index:sha512:{hash}` - SHA512 hash lookup
|
||||
- **Statistics**: `hash:stats` - Redis Hash with count and size
|
||||
### Index: `hasher`
|
||||
- **Shards**: 10 (horizontal scaling)
|
||||
- **Replicas**: 1 (redundancy)
|
||||
- **Analyzer**: Custom lowercase analyzer
|
||||
|
||||
### Document Schema
|
||||
### Schema
|
||||
```json
|
||||
{
|
||||
"plaintext": "string",
|
||||
"md5": "string",
|
||||
"sha1": "string",
|
||||
"sha256": "string",
|
||||
"sha512": "string",
|
||||
"created_at": "ISO 8601 date string"
|
||||
"plaintext": "text + keyword",
|
||||
"md5": "keyword",
|
||||
"sha1": "keyword",
|
||||
"sha256": "keyword",
|
||||
"sha512": "keyword",
|
||||
"created_at": "date"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -186,9 +182,9 @@ export REDIS_DB=0
|
||||
## 🚀 Performance Metrics
|
||||
|
||||
- **Bulk Indexing**: 1000-5000 docs/sec
|
||||
- **Search Latency**: <10ms (typical O(1) lookups)
|
||||
- **Concurrent Users**: 100+ supported
|
||||
- **Horizontal Scaling**: Ready with Redis Cluster
|
||||
- **Search Latency**: <50ms (typical)
|
||||
- **Concurrent Users**: 50+ supported
|
||||
- **Horizontal Scaling**: Ready with 10 shards
|
||||
|
||||
---
|
||||
|
||||
@@ -228,7 +224,7 @@ export REDIS_DB=0
|
||||
- Node.js 18.x or higher
|
||||
- Redis 6.x or higher
|
||||
- 512MB RAM minimum
|
||||
- Redis server running locally or remotely
|
||||
- Redis server (local or remote)
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -17,6 +17,12 @@ npm run index-file <file> -- --batch-size N # Custom batch size
|
||||
npm run index-file -- --help # Show help
|
||||
```
|
||||
|
||||
### Duplicate Removal
|
||||
```bash
|
||||
npm run remove-duplicates -- --field md5 --dry-run # Preview duplicates
|
||||
npm run remove-duplicates -- --field md5 --execute # Remove duplicates
|
||||
```
|
||||
|
||||
## 🔍 Hash Detection Patterns
|
||||
|
||||
| Type | Length | Example |
|
||||
@@ -65,6 +71,9 @@ redis-cli KEYS "hash:plaintext:*"
|
||||
# Get document
|
||||
redis-cli GET "hash:plaintext:password"
|
||||
|
||||
# Get statistics
|
||||
redis-cli HGETALL hash:stats
|
||||
|
||||
# Clear all data (CAUTION!)
|
||||
redis-cli FLUSHDB
|
||||
```
|
||||
@@ -85,13 +94,17 @@ redis-cli FLUSHDB
|
||||
| `app/page.tsx` | Main UI component |
|
||||
| `app/api/search/route.ts` | Search endpoint |
|
||||
| `lib/redis.ts` | Redis configuration |
|
||||
| `lib/hash.ts` | Hash utilities |
|
||||
| `scripts/index-file.ts` | Bulk indexer |
|
||||
| `scripts/remove-duplicates.ts` | Duplicate remover |
|
||||
|
||||
## ⚙️ Environment Variables
|
||||
|
||||
```bash
|
||||
# Optional
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=your-password
|
||||
REDIS_DB=0
|
||||
NODE_ENV=production
|
||||
```
|
||||
|
||||
@@ -135,6 +148,7 @@ curl http://localhost:3000/api/health
|
||||
|
||||
```bash
|
||||
npm run index-file -- --help # Indexer help
|
||||
npm run remove-duplicates -- --help # Duplicate remover help
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
159
README.md
159
README.md
@@ -2,7 +2,7 @@
|
||||
|
||||
A modern, high-performance hash search and generation tool powered by Redis and Next.js. Search for hash values to find their plaintext origins or generate hashes from any text input.
|
||||
|
||||

|
||||

|
||||

|
||||

|
||||
|
||||
@@ -11,11 +11,10 @@ A modern, high-performance hash search and generation tool powered by Redis and
|
||||
- 🔍 **Hash Lookup**: Search for MD5, SHA1, SHA256, and SHA512 hashes
|
||||
- 🔑 **Hash Generation**: Generate multiple hash types from plaintext
|
||||
- 💾 **Auto-Indexing**: Automatically stores searched plaintext and hashes
|
||||
- 📊 **Redis Backend**: Ultra-fast in-memory storage with persistence
|
||||
- 🚀 **Bulk Indexing**: Import wordlists via command-line script with resume capability
|
||||
- 📊 **Redis Backend**: Fast in-memory storage with persistence
|
||||
- 🚀 **Bulk Indexing**: Import wordlists via command-line script
|
||||
- 🎨 **Modern UI**: Beautiful, responsive interface with real-time feedback
|
||||
- 📋 **Copy to Clipboard**: One-click copying of any hash value
|
||||
- ⚡ **High Performance**: Lightning-fast searches with Redis indexing
|
||||
|
||||
## 🏗️ Architecture
|
||||
|
||||
@@ -34,8 +33,7 @@ A modern, high-performance hash search and generation tool powered by Redis and
|
||||
↓
|
||||
┌─────────────┐
|
||||
│ Redis │ ← In-memory storage
|
||||
│ (Key-Value │ (localhost:6379)
|
||||
│ + Hashes) │
|
||||
│ │ with persistence
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
@@ -44,7 +42,7 @@ A modern, high-performance hash search and generation tool powered by Redis and
|
||||
### Prerequisites
|
||||
|
||||
- Node.js 18.x or higher
|
||||
- Redis 6.x or higher running on `localhost:6379`
|
||||
- Redis 7.x or higher
|
||||
- npm or yarn
|
||||
|
||||
### Installation
|
||||
@@ -60,25 +58,20 @@ A modern, high-performance hash search and generation tool powered by Redis and
|
||||
npm install
|
||||
```
|
||||
|
||||
3. **Start Redis** (if not already running)
|
||||
|
||||
```bash
|
||||
# Using Docker
|
||||
docker run -d --name redis -p 6379:6379 redis:latest
|
||||
|
||||
# Or using system package manager
|
||||
sudo systemctl start redis
|
||||
```
|
||||
|
||||
4. **Configure Redis** (optional)
|
||||
3. **Configure Redis** (optional)
|
||||
|
||||
By default, the app connects to `localhost:6379`. To change this:
|
||||
|
||||
```bash
|
||||
export REDIS_HOST=your-redis-host
|
||||
export REDIS_HOST=localhost
|
||||
export REDIS_PORT=6379
|
||||
export REDIS_PASSWORD=your-password # if authentication is enabled
|
||||
export REDIS_DB=0 # database number
|
||||
export REDIS_PASSWORD=your_password # Optional
|
||||
export REDIS_DB=0 # Optional, defaults to 0
|
||||
```
|
||||
|
||||
4. **Start Redis**
|
||||
```bash
|
||||
redis-server
|
||||
```
|
||||
|
||||
5. **Run the development server**
|
||||
@@ -115,10 +108,7 @@ npm run index-file wordlist.txt
|
||||
# With custom batch size
|
||||
npm run index-file wordlist.txt -- --batch-size 500
|
||||
|
||||
# Skip duplicate checking (faster)
|
||||
npm run index-file wordlist.txt -- --no-check
|
||||
|
||||
# Resume interrupted indexing
|
||||
# Resume from last position
|
||||
npm run index-file wordlist.txt -- --resume
|
||||
|
||||
# Show help
|
||||
@@ -135,11 +125,26 @@ qwerty
|
||||
|
||||
**Script features**:
|
||||
- ✅ Bulk indexing with configurable batch size
|
||||
- ✅ Progress indicator and real-time stats
|
||||
- ✅ State persistence with resume capability
|
||||
- ✅ Optional duplicate checking
|
||||
- ✅ Progress indicator with percentage
|
||||
- ✅ Error handling and reporting
|
||||
- ✅ Performance metrics (docs/sec)
|
||||
- ✅ State persistence for resume capability
|
||||
- ✅ Duplicate detection
|
||||
|
||||
### Remove Duplicates Script
|
||||
|
||||
Find and remove duplicate hash entries:
|
||||
|
||||
```bash
|
||||
# Dry run (preview only)
|
||||
npm run remove-duplicates -- --dry-run --field md5
|
||||
|
||||
# Execute removal
|
||||
npm run remove-duplicates -- --execute --field sha256
|
||||
|
||||
# With custom batch size
|
||||
npm run remove-duplicates -- --execute --field md5 --batch-size 100
|
||||
```
|
||||
|
||||
## 🔌 API Reference
|
||||
|
||||
@@ -180,6 +185,7 @@ Search for a hash or generate hashes from plaintext.
|
||||
"found": true,
|
||||
"isPlaintext": true,
|
||||
"plaintext": "password",
|
||||
"wasGenerated": false,
|
||||
"hashes": {
|
||||
"md5": "5f4dcc3b5aa765d61d8327deb882cf99",
|
||||
"sha1": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
|
||||
@@ -193,57 +199,60 @@ Search for a hash or generate hashes from plaintext.
|
||||
|
||||
**GET** `/api/health`
|
||||
|
||||
Check Redis connection and index status.
|
||||
Check Redis connection and database statistics.
|
||||
|
||||
**Response**:
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"redis": {
|
||||
"version": "7.2.4",
|
||||
"connected": true,
|
||||
"version": "7.0.15",
|
||||
"usedMemory": 2097152,
|
||||
"dbSize": 1542
|
||||
"memoryUsed": "1.5M",
|
||||
"uptime": 3600
|
||||
},
|
||||
"index": {
|
||||
"exists": true,
|
||||
"name": "hasher",
|
||||
"stats": {
|
||||
"documentCount": 1542,
|
||||
"indexSize": 524288
|
||||
}
|
||||
"database": {
|
||||
"totalKeys": 1542,
|
||||
"documentCount": 386,
|
||||
"totalSize": 524288
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 🗄️ Redis Data Structure
|
||||
|
||||
### Key Structure
|
||||
### Key Structures
|
||||
|
||||
**Main Documents**: `hash:plaintext:{plaintext}`
|
||||
- Stores complete hash document as JSON string
|
||||
- Contains all hash algorithms and metadata
|
||||
The application uses the following Redis key patterns:
|
||||
|
||||
**Hash Indexes**: `hash:index:{algorithm}:{hash}`
|
||||
- Reverse lookup from hash to plaintext
|
||||
- One key per algorithm (md5, sha1, sha256, sha512)
|
||||
- Value is the plaintext string
|
||||
1. **Hash Documents**: `hash:plaintext:{plaintext}`
|
||||
```json
|
||||
{
|
||||
"plaintext": "password",
|
||||
"md5": "5f4dcc3b5aa765d61d8327deb882cf99",
|
||||
"sha1": "5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8",
|
||||
"sha256": "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8",
|
||||
"sha512": "b109f3bbbc244eb82441917ed06d618b9008dd09b3befd1b5e07394c706a8bb980b1d7785e5976ec049b46df5f1326af5a2ea6d103fd07c95385ffab0cacbc86",
|
||||
"created_at": "2024-01-01T00:00:00.000Z"
|
||||
}
|
||||
```
|
||||
|
||||
**Statistics**: `hash:stats` (Redis Hash)
|
||||
- `count`: Total number of unique plaintexts
|
||||
- `size`: Approximate total size in bytes
|
||||
2. **Hash Indexes**: `hash:index:{algorithm}:{hash}`
|
||||
- Points to the plaintext value
|
||||
- One index per hash algorithm (md5, sha1, sha256, sha512)
|
||||
|
||||
### Document Schema
|
||||
3. **Statistics**: `hash:stats` (Redis Hash)
|
||||
- `count`: Total number of documents
|
||||
- `size`: Total data size in bytes
|
||||
|
||||
```typescript
|
||||
{
|
||||
"plaintext": string,
|
||||
"md5": string,
|
||||
"sha1": string,
|
||||
"sha256": string,
|
||||
"sha512": string,
|
||||
"created_at": string (ISO 8601)
|
||||
}
|
||||
### Data Flow
|
||||
|
||||
```
|
||||
Plaintext → Generate Hashes → Store Document
|
||||
↓
|
||||
Create 4 Indexes (one per algorithm)
|
||||
↓
|
||||
Update Statistics
|
||||
```
|
||||
|
||||
## 📁 Project Structure
|
||||
@@ -260,11 +269,11 @@ hasher/
|
||||
│ ├── page.tsx # Main UI component
|
||||
│ └── globals.css # Global styles
|
||||
├── lib/
|
||||
│ ├── redis.ts # Redis client & data layer
|
||||
│ ├── redis.ts # Redis client & operations
|
||||
│ └── hash.ts # Hash utilities
|
||||
├── scripts/
|
||||
│ ├── index-file.ts # Bulk indexing script
|
||||
│ └── remove-duplicates.ts # Duplicate removal utility
|
||||
│ └── remove-duplicates.ts # Duplicate removal script
|
||||
├── package.json
|
||||
├── tsconfig.json
|
||||
├── next.config.ts
|
||||
@@ -287,8 +296,8 @@ Create a `.env.local` file:
|
||||
```env
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD=your-password
|
||||
REDIS_DB=0
|
||||
REDIS_PASSWORD=your_password # Optional
|
||||
REDIS_DB=0 # Optional
|
||||
```
|
||||
|
||||
### Linting
|
||||
@@ -308,10 +317,23 @@ npm run lint
|
||||
|
||||
## 🚀 Performance
|
||||
|
||||
- **Bulk Indexing**: ~1000-5000 docs/sec (depending on hardware)
|
||||
- **Search Latency**: <50ms (typical)
|
||||
- **Horizontal Scaling**: 10 shards for parallel processing
|
||||
- **Auto-refresh**: Instant search availability for new documents
|
||||
- **Bulk Indexing**: ~5000-15000 docs/sec (depending on hardware)
|
||||
- **Search Latency**: <5ms (typical)
|
||||
- **Memory Efficient**: In-memory storage with optional persistence
|
||||
- **Atomic Operations**: Pipeline-based batch operations
|
||||
|
||||
## 🔧 Redis Configuration
|
||||
|
||||
For optimal performance, consider these Redis settings:
|
||||
|
||||
```conf
|
||||
# redis.conf
|
||||
maxmemory 2gb
|
||||
maxmemory-policy allkeys-lru
|
||||
save 900 1
|
||||
save 300 10
|
||||
save 60 10000
|
||||
```
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
@@ -341,4 +363,3 @@ For issues, questions, or contributions, please open an issue on GitHub.
|
||||
---
|
||||
|
||||
**Made with ❤️ for the security and development community**
|
||||
|
||||
|
||||
@@ -1,222 +0,0 @@
|
||||
# Redis Migration - Quick Reference
|
||||
|
||||
## 🚀 Quick Start
|
||||
|
||||
### 1. Install Redis
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt-get install redis-server
|
||||
|
||||
# macOS
|
||||
brew install redis
|
||||
|
||||
# Start Redis
|
||||
redis-server
|
||||
# or
|
||||
sudo systemctl start redis-server
|
||||
```
|
||||
|
||||
### 2. Configure Environment (Optional)
|
||||
```bash
|
||||
# Create .env.local
|
||||
cat > .env.local << EOF
|
||||
REDIS_HOST=localhost
|
||||
REDIS_PORT=6379
|
||||
REDIS_PASSWORD= # Leave empty if no password
|
||||
REDIS_DB=0
|
||||
EOF
|
||||
```
|
||||
|
||||
### 3. Start Application
|
||||
```bash
|
||||
yarn dev
|
||||
```
|
||||
|
||||
## 🔍 Testing the Migration
|
||||
|
||||
### Test Health Endpoint
|
||||
```bash
|
||||
curl http://localhost:3000/api/health
|
||||
```
|
||||
|
||||
Expected response:
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"redis": {
|
||||
"version": "7.x",
|
||||
"memory": "1.5M",
|
||||
"dbSize": 0
|
||||
},
|
||||
"stats": {
|
||||
"count": 0,
|
||||
"size": 0
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Test Search API
|
||||
```bash
|
||||
# Generate hashes
|
||||
curl -X POST http://localhost:3000/api/search \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"query":"password"}'
|
||||
|
||||
# Search for hash
|
||||
curl -X POST http://localhost:3000/api/search \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"query":"5f4dcc3b5aa765d61d8327deb882cf99"}'
|
||||
```
|
||||
|
||||
## 📊 Redis Commands
|
||||
|
||||
### Check Connection
|
||||
```bash
|
||||
redis-cli ping
|
||||
# Should return: PONG
|
||||
```
|
||||
|
||||
### View Data
|
||||
```bash
|
||||
# Count all keys
|
||||
redis-cli DBSIZE
|
||||
|
||||
# List all documents
|
||||
redis-cli KEYS "hash:plaintext:*"
|
||||
|
||||
# Get a specific document
|
||||
redis-cli GET "hash:plaintext:password"
|
||||
|
||||
# Get statistics
|
||||
redis-cli HGETALL hash:stats
|
||||
|
||||
# Search by hash
|
||||
redis-cli GET "hash:index:md5:5f4dcc3b5aa765d61d8327deb882cf99"
|
||||
```
|
||||
|
||||
### Clear Data (if needed)
|
||||
```bash
|
||||
# WARNING: Deletes ALL data in current database
|
||||
redis-cli FLUSHDB
|
||||
```
|
||||
|
||||
## 🔄 Bulk Indexing
|
||||
|
||||
### Basic Usage
|
||||
```bash
|
||||
yarn index-file sample-wordlist.txt
|
||||
```
|
||||
|
||||
### Advanced Options
|
||||
```bash
|
||||
# Custom batch size
|
||||
yarn index-file wordlist.txt -- --batch-size 500
|
||||
|
||||
# Skip duplicate checking (faster)
|
||||
yarn index-file wordlist.txt -- --no-check
|
||||
|
||||
# Resume from previous state
|
||||
yarn index-file wordlist.txt -- --resume
|
||||
|
||||
# Custom state file
|
||||
yarn index-file wordlist.txt -- --state-file .my-state.json
|
||||
```
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### Cannot connect to Redis
|
||||
```bash
|
||||
# Check if Redis is running
|
||||
redis-cli ping
|
||||
|
||||
# Check Redis status
|
||||
sudo systemctl status redis-server
|
||||
|
||||
# View Redis logs
|
||||
sudo journalctl -u redis-server -f
|
||||
```
|
||||
|
||||
### Application shows Redis errors
|
||||
1. Verify Redis is running: `redis-cli ping`
|
||||
2. Check environment variables in `.env.local`
|
||||
3. Check firewall rules if Redis is on another machine
|
||||
4. Verify Redis password if authentication is enabled
|
||||
|
||||
### Clear stale state files
|
||||
```bash
|
||||
rm -f .indexer-state-*.json
|
||||
```
|
||||
|
||||
## 📈 Monitoring
|
||||
|
||||
### Redis Memory Usage
|
||||
```bash
|
||||
redis-cli INFO memory
|
||||
```
|
||||
|
||||
### Redis Stats
|
||||
```bash
|
||||
redis-cli INFO stats
|
||||
```
|
||||
|
||||
### Application Stats
|
||||
```bash
|
||||
curl http://localhost:3000/api/health | jq .
|
||||
```
|
||||
|
||||
## 🔒 Security (Production)
|
||||
|
||||
### Enable Redis Authentication
|
||||
```bash
|
||||
# Edit redis.conf
|
||||
sudo nano /etc/redis/redis.conf
|
||||
|
||||
# Add/uncomment:
|
||||
requirepass your-strong-password
|
||||
|
||||
# Restart Redis
|
||||
sudo systemctl restart redis-server
|
||||
```
|
||||
|
||||
### Update .env.local
|
||||
```env
|
||||
REDIS_PASSWORD=your-strong-password
|
||||
```
|
||||
|
||||
## 📚 Key Differences from Elasticsearch
|
||||
|
||||
| Feature | Elasticsearch | Redis |
|
||||
|---------|--------------|-------|
|
||||
| Data Model | Document-based | Key-value |
|
||||
| Search Complexity | O(log n) | O(1) |
|
||||
| Setup | Complex cluster | Single instance |
|
||||
| Memory | Higher | Lower |
|
||||
| Latency | ~50ms | <10ms |
|
||||
| Scaling | Shards/Replicas | Cluster/Sentinel |
|
||||
|
||||
## ✅ Verification Checklist
|
||||
|
||||
- [ ] Redis is installed and running
|
||||
- [ ] Application builds without errors (`yarn build`)
|
||||
- [ ] Health endpoint returns OK status
|
||||
- [ ] Can generate hashes from plaintext
|
||||
- [ ] Can search for generated hashes
|
||||
- [ ] Statistics display on homepage
|
||||
- [ ] Bulk indexing script works
|
||||
- [ ] Data persists after application restart
|
||||
|
||||
## 📞 Support
|
||||
|
||||
- Redis Documentation: https://redis.io/docs/
|
||||
- ioredis Documentation: https://github.com/redis/ioredis
|
||||
- Project README: [README.md](README.md)
|
||||
|
||||
---
|
||||
|
||||
**Quick Test Command:**
|
||||
```bash
|
||||
# One-liner to test everything
|
||||
redis-cli ping && yarn build && curl -s http://localhost:3000/api/health | jq .status
|
||||
```
|
||||
|
||||
If all commands succeed, the migration is working correctly! ✅
|
||||
100
TESTING.md
100
TESTING.md
@@ -9,7 +9,7 @@ This guide will help you quickly set up and test the Hasher application.
|
||||
Ensure you have:
|
||||
- ✅ Node.js 18.x or higher (`node --version`)
|
||||
- ✅ npm (`npm --version`)
|
||||
- ✅ Redis running on `localhost:6379`
|
||||
- ✅ Redis 7.x or higher running on `localhost:6379`
|
||||
|
||||
### 2. Installation
|
||||
|
||||
@@ -20,6 +20,9 @@ cd hasher
|
||||
# Install dependencies
|
||||
npm install
|
||||
|
||||
# Start Redis (if not running)
|
||||
redis-server
|
||||
|
||||
# Start the development server
|
||||
npm run dev
|
||||
```
|
||||
@@ -38,13 +41,9 @@ Expected response:
|
||||
{
|
||||
"status": "ok",
|
||||
"redis": {
|
||||
"version": "7.x",
|
||||
"memory": "1.5M",
|
||||
"dbSize": 0
|
||||
},
|
||||
"stats": {
|
||||
"count": 0,
|
||||
"size": 0
|
||||
"version": "7.2.4",
|
||||
"connected": true,
|
||||
"memoryUsed": "1.5M"
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -92,12 +91,11 @@ npm run index-file sample-wordlist.txt
|
||||
|
||||
**Expected Output**:
|
||||
```
|
||||
📚 Hasher Indexer
|
||||
📚 Hasher Indexer - Redis Edition
|
||||
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||||
Redis: localhost:6379
|
||||
File: sample-wordlist.txt
|
||||
Batch size: 100
|
||||
Duplicate check: enabled
|
||||
|
||||
🔗 Connecting to Redis...
|
||||
✅ Connected successfully
|
||||
@@ -120,6 +118,16 @@ After running the bulk indexer, search for:
|
||||
|
||||
All should return their plaintext values.
|
||||
|
||||
### Test 6: Remove Duplicates
|
||||
|
||||
```bash
|
||||
# Dry run to preview duplicates
|
||||
npm run remove-duplicates -- --dry-run --field md5
|
||||
|
||||
# Execute removal
|
||||
npm run remove-duplicates -- --execute --field md5
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🔍 API Testing
|
||||
@@ -194,7 +202,7 @@ fetch('/api/search', {
|
||||
- [ ] New plaintext is saved to Redis
|
||||
- [ ] Saved hashes can be found in subsequent searches
|
||||
- [ ] Bulk indexing saves all entries
|
||||
- [ ] Redis keys are created with proper patterns
|
||||
- [ ] Duplicate detection works correctly
|
||||
|
||||
### Error Handling
|
||||
- [ ] Redis connection errors are handled
|
||||
@@ -212,8 +220,12 @@ fetch('/api/search', {
|
||||
```bash
|
||||
# Check if Redis is running
|
||||
redis-cli ping
|
||||
# Should respond: PONG
|
||||
|
||||
# If not accessible, update the environment variables
|
||||
# If not running, start Redis
|
||||
redis-server
|
||||
|
||||
# If using custom host/port, update environment variables
|
||||
export REDIS_HOST=localhost
|
||||
export REDIS_PORT=6379
|
||||
npm run dev
|
||||
@@ -251,34 +263,48 @@ npm run index-file -- "$(pwd)/sample-wordlist.txt"
|
||||
|
||||
## 📊 Verify Data in Redis
|
||||
|
||||
### Check Database Size
|
||||
### Check Redis Connection
|
||||
```bash
|
||||
redis-cli ping
|
||||
```
|
||||
|
||||
### Count Keys
|
||||
```bash
|
||||
redis-cli DBSIZE
|
||||
```
|
||||
|
||||
### Get Statistics
|
||||
```bash
|
||||
redis-cli HGETALL hash:stats
|
||||
```
|
||||
|
||||
### View Sample Documents
|
||||
```bash
|
||||
# List first 10 document keys
|
||||
redis-cli --scan --pattern "hash:plaintext:*" | head -10
|
||||
# List hash document keys
|
||||
redis-cli --scan --pattern "hash:plaintext:*" | head -5
|
||||
|
||||
# Get a specific document
|
||||
redis-cli GET "hash:plaintext:password"
|
||||
```
|
||||
|
||||
### Check Statistics
|
||||
```bash
|
||||
redis-cli HGETALL hash:stats
|
||||
```
|
||||
|
||||
### Search Specific Hash
|
||||
```bash
|
||||
# Find document by MD5 hash
|
||||
# Find plaintext for an MD5 hash
|
||||
redis-cli GET "hash:index:md5:5f4dcc3b5aa765d61d8327deb882cf99"
|
||||
|
||||
# Then get the full document
|
||||
# Get the full document
|
||||
redis-cli GET "hash:plaintext:password"
|
||||
```
|
||||
|
||||
### Monitor Redis Activity
|
||||
```bash
|
||||
# Watch commands in real-time
|
||||
redis-cli MONITOR
|
||||
|
||||
# Check memory usage
|
||||
redis-cli INFO memory
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎨 UI Testing
|
||||
@@ -318,9 +344,18 @@ Create `search.json`:
|
||||
```
|
||||
|
||||
### Expected Performance
|
||||
- Search latency: < 100ms
|
||||
- Bulk indexing: 1000+ docs/sec
|
||||
- Concurrent requests: 50+
|
||||
- Search latency: < 5ms
|
||||
- Bulk indexing: 5000-15000 docs/sec
|
||||
- Concurrent requests: 100+
|
||||
|
||||
### Redis Performance Testing
|
||||
```bash
|
||||
# Benchmark Redis operations
|
||||
redis-benchmark -t set,get -n 100000 -q
|
||||
|
||||
# Test with pipeline
|
||||
redis-benchmark -t set,get -n 100000 -q -P 16
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -339,6 +374,12 @@ Create `search.json`:
|
||||
- [ ] Error message information disclosure
|
||||
- [ ] Redis authentication (if enabled)
|
||||
|
||||
### Redis Security Checklist
|
||||
- [ ] Redis password configured (REDIS_PASSWORD)
|
||||
- [ ] Redis not exposed to internet
|
||||
- [ ] Firewall rules configured
|
||||
- [ ] TLS/SSL enabled (if needed)
|
||||
|
||||
---
|
||||
|
||||
## ✅ Pre-Production Checklist
|
||||
@@ -347,7 +388,8 @@ Before deploying to production:
|
||||
|
||||
- [ ] All tests passing
|
||||
- [ ] Environment variables configured
|
||||
- [ ] Redis secured and backed up (RDB/AOF)
|
||||
- [ ] Redis secured with password
|
||||
- [ ] Redis persistence configured (RDB/AOF)
|
||||
- [ ] SSL/TLS certificates installed
|
||||
- [ ] Error logging configured
|
||||
- [ ] Monitoring set up
|
||||
@@ -355,6 +397,7 @@ Before deploying to production:
|
||||
- [ ] Security review done
|
||||
- [ ] Documentation reviewed
|
||||
- [ ] Backup strategy in place
|
||||
- [ ] Redis memory limits configured
|
||||
|
||||
---
|
||||
|
||||
@@ -375,6 +418,7 @@ Before deploying to production:
|
||||
- [ ] Hash search: PASS/FAIL
|
||||
- [ ] Bulk indexing: PASS/FAIL
|
||||
- [ ] API endpoints: PASS/FAIL
|
||||
- [ ] Duplicate removal: PASS/FAIL
|
||||
|
||||
### Issues Found
|
||||
1. [Description]
|
||||
@@ -387,6 +431,7 @@ Before deploying to production:
|
||||
- Average search time:
|
||||
- Bulk index rate:
|
||||
- Concurrent users tested:
|
||||
- Redis memory usage:
|
||||
|
||||
## Conclusion
|
||||
[Summary of testing]
|
||||
@@ -402,7 +447,8 @@ After successful testing:
|
||||
2. ✅ Fix any issues found
|
||||
3. ✅ Perform load testing
|
||||
4. ✅ Review security
|
||||
5. ✅ Prepare for deployment
|
||||
5. ✅ Configure Redis persistence
|
||||
6. ✅ Prepare for deployment
|
||||
|
||||
See [DEPLOYMENT.md](DEPLOYMENT.md) for deployment instructions.
|
||||
|
||||
|
||||
@@ -6,24 +6,19 @@ export async function GET() {
|
||||
// Check Redis connection and get info
|
||||
const redisInfo = await getRedisInfo();
|
||||
|
||||
// Get index stats
|
||||
// Get stats
|
||||
const stats = await getStats();
|
||||
|
||||
return NextResponse.json({
|
||||
status: 'ok',
|
||||
redis: {
|
||||
connected: redisInfo.connected,
|
||||
version: redisInfo.version,
|
||||
usedMemory: redisInfo.usedMemory,
|
||||
memory: redisInfo.memory,
|
||||
dbSize: redisInfo.dbSize
|
||||
},
|
||||
index: {
|
||||
exists: true,
|
||||
name: INDEX_NAME,
|
||||
stats: {
|
||||
documentCount: stats.count,
|
||||
indexSize: stats.size
|
||||
}
|
||||
stats: {
|
||||
count: stats.count,
|
||||
size: stats.size
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
|
||||
143
app/page.tsx
143
app/page.tsx
@@ -1,7 +1,8 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useEffect } from 'react';
|
||||
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database } from 'lucide-react';
|
||||
import { useState, useEffect, useCallback, Suspense } from 'react';
|
||||
import { useSearchParams } from 'next/navigation';
|
||||
import { Search, Copy, Check, Hash, Key, AlertCircle, Loader2, Database, Link } from 'lucide-react';
|
||||
|
||||
interface SearchResult {
|
||||
found: boolean;
|
||||
@@ -45,13 +46,62 @@ function formatNumber(num: number): string {
|
||||
return num.toLocaleString();
|
||||
}
|
||||
|
||||
export default function Home() {
|
||||
function HasherContent() {
|
||||
const searchParams = useSearchParams();
|
||||
const [query, setQuery] = useState('');
|
||||
const [result, setResult] = useState<SearchResult | null>(null);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [error, setError] = useState('');
|
||||
const [copiedField, setCopiedField] = useState<string | null>(null);
|
||||
const [stats, setStats] = useState<IndexStats | null>(null);
|
||||
const [copiedLink, setCopiedLink] = useState(false);
|
||||
const [initialLoadDone, setInitialLoadDone] = useState(false);
|
||||
|
||||
const performSearch = useCallback(async (searchQuery: string, updateUrl: boolean = true) => {
|
||||
if (!searchQuery.trim()) return;
|
||||
|
||||
setLoading(true);
|
||||
setError('');
|
||||
setResult(null);
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/search', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: searchQuery.trim() })
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Search failed');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setResult(data);
|
||||
|
||||
// Update URL with search query (using history API to avoid re-triggering effects)
|
||||
if (updateUrl) {
|
||||
const newUrl = new URL(window.location.href);
|
||||
newUrl.searchParams.set('q', searchQuery.trim());
|
||||
window.history.replaceState(null, '', newUrl.pathname + newUrl.search);
|
||||
}
|
||||
} catch (_err) {
|
||||
setError('Failed to perform search. Please check your connection.');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
}, []);
|
||||
|
||||
// Load query from URL on mount (only once)
|
||||
useEffect(() => {
|
||||
if (initialLoadDone) return;
|
||||
|
||||
const urlQuery = searchParams.get('q');
|
||||
if (urlQuery) {
|
||||
setQuery(urlQuery);
|
||||
performSearch(urlQuery, false);
|
||||
}
|
||||
setInitialLoadDone(true);
|
||||
}, [searchParams, performSearch, initialLoadDone]);
|
||||
|
||||
useEffect(() => {
|
||||
const fetchStats = async () => {
|
||||
@@ -73,30 +123,7 @@ export default function Home() {
|
||||
|
||||
const handleSearch = async (e: React.FormEvent) => {
|
||||
e.preventDefault();
|
||||
if (!query.trim()) return;
|
||||
|
||||
setLoading(true);
|
||||
setError('');
|
||||
setResult(null);
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/search', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query: query.trim() })
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Search failed');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
setResult(data);
|
||||
} catch (_err) {
|
||||
setError('Failed to perform search. Please check your connection.');
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
performSearch(query);
|
||||
};
|
||||
|
||||
const copyToClipboard = (text: string, field: string) => {
|
||||
@@ -105,6 +132,14 @@ export default function Home() {
|
||||
setTimeout(() => setCopiedField(null), 2000);
|
||||
};
|
||||
|
||||
const copyShareLink = () => {
|
||||
const url = new URL(window.location.href);
|
||||
url.searchParams.set('q', query.trim());
|
||||
navigator.clipboard.writeText(url.toString());
|
||||
setCopiedLink(true);
|
||||
setTimeout(() => setCopiedLink(false), 2000);
|
||||
};
|
||||
|
||||
const HashDisplay = ({ label, value, field }: { label: string; value: string; field: string }) => (
|
||||
<div className="bg-gray-50 rounded-lg p-4 border border-gray-200">
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
@@ -166,19 +201,35 @@ export default function Home() {
|
||||
value={query}
|
||||
onChange={(e) => setQuery(e.target.value)}
|
||||
placeholder="Enter a hash or plaintext..."
|
||||
className="w-full px-6 py-4 pr-14 text-lg rounded-2xl border-2 border-gray-200 focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all shadow-sm"
|
||||
className="w-full px-6 py-4 pr-28 text-lg rounded-2xl border-2 border-gray-200 focus:border-blue-500 focus:ring-4 focus:ring-blue-100 outline-none transition-all shadow-sm"
|
||||
/>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading || !query.trim()}
|
||||
className="absolute right-2 top-1/2 -translate-y-1/2 bg-gradient-to-r from-blue-600 to-purple-600 text-white p-3 rounded-xl hover:shadow-lg disabled:opacity-50 disabled:cursor-not-allowed transition-all"
|
||||
>
|
||||
{loading ? (
|
||||
<Loader2 className="w-6 h-6 animate-spin" />
|
||||
) : (
|
||||
<Search className="w-6 h-6" />
|
||||
<div className="absolute right-2 top-1/2 -translate-y-1/2 flex gap-1">
|
||||
{query.trim() && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={copyShareLink}
|
||||
className="bg-gray-100 text-gray-600 p-3 rounded-xl hover:bg-gray-200 transition-all"
|
||||
title="Copy share link"
|
||||
>
|
||||
{copiedLink ? (
|
||||
<Check className="w-6 h-6 text-green-600" />
|
||||
) : (
|
||||
<Link className="w-6 h-6" />
|
||||
)}
|
||||
</button>
|
||||
)}
|
||||
</button>
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading || !query.trim()}
|
||||
className="bg-gradient-to-r from-blue-600 to-purple-600 text-white p-3 rounded-xl hover:shadow-lg disabled:opacity-50 disabled:cursor-not-allowed transition-all"
|
||||
>
|
||||
{loading ? (
|
||||
<Loader2 className="w-6 h-6 animate-spin" />
|
||||
) : (
|
||||
<Search className="w-6 h-6" />
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -315,3 +366,19 @@ export default function Home() {
|
||||
);
|
||||
}
|
||||
|
||||
function LoadingFallback() {
|
||||
return (
|
||||
<div className="min-h-screen bg-gradient-to-br from-blue-50 via-white to-purple-50 flex items-center justify-center">
|
||||
<Loader2 className="w-12 h-12 text-blue-600 animate-spin" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default function Home() {
|
||||
return (
|
||||
<Suspense fallback={<LoadingFallback />}>
|
||||
<HasherContent />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ export interface HashResult {
|
||||
/**
|
||||
* Generate all common hashes for a given plaintext
|
||||
*/
|
||||
export async function generateHashes(plaintext: string): Promise<HashResult> {
|
||||
export function generateHashes(plaintext: string): HashResult {
|
||||
return {
|
||||
plaintext,
|
||||
md5: crypto.createHash('md5').update(plaintext).digest('hex'),
|
||||
|
||||
85
lib/redis.ts
85
lib/redis.ts
@@ -103,76 +103,79 @@ export async function findByHash(algorithm: string, hash: string): Promise<HashD
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if plaintext or any of its hashes exist
|
||||
* Check if a plaintext or any of its hashes exist
|
||||
*/
|
||||
export async function checkExistence(plaintext: string, hashes: {
|
||||
md5: string;
|
||||
sha1: string;
|
||||
sha256: string;
|
||||
sha512: string;
|
||||
export async function checkExistence(plaintext: string, hashes?: {
|
||||
md5?: string;
|
||||
sha1?: string;
|
||||
sha256?: string;
|
||||
sha512?: string;
|
||||
}): Promise<boolean> {
|
||||
const pipeline = redisClient.pipeline();
|
||||
// Check if plaintext exists
|
||||
const plaintextKey = `hash:plaintext:${plaintext}`;
|
||||
const exists = await redisClient.exists(plaintextKey);
|
||||
|
||||
pipeline.exists(`hash:plaintext:${plaintext}`);
|
||||
pipeline.exists(`hash:index:md5:${hashes.md5}`);
|
||||
pipeline.exists(`hash:index:sha1:${hashes.sha1}`);
|
||||
pipeline.exists(`hash:index:sha256:${hashes.sha256}`);
|
||||
pipeline.exists(`hash:index:sha512:${hashes.sha512}`);
|
||||
if (exists) return true;
|
||||
|
||||
const results = await pipeline.exec();
|
||||
// Check if any hash exists
|
||||
if (hashes) {
|
||||
const pipeline = redisClient.pipeline();
|
||||
if (hashes.md5) pipeline.exists(`hash:index:md5:${hashes.md5}`);
|
||||
if (hashes.sha1) pipeline.exists(`hash:index:sha1:${hashes.sha1}`);
|
||||
if (hashes.sha256) pipeline.exists(`hash:index:sha256:${hashes.sha256}`);
|
||||
if (hashes.sha512) pipeline.exists(`hash:index:sha512:${hashes.sha512}`);
|
||||
|
||||
if (!results) return false;
|
||||
const results = await pipeline.exec();
|
||||
if (results && results.some(([_err, result]) => result === 1)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if any key exists
|
||||
return results.some(([err, value]) => !err && value === 1);
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index statistics
|
||||
* Get database statistics
|
||||
*/
|
||||
export async function getStats(): Promise<{ count: number; size: number }> {
|
||||
const stats = await redisClient.hgetall('hash:stats');
|
||||
|
||||
return {
|
||||
count: parseInt(stats.count || '0', 10),
|
||||
size: parseInt(stats.size || '0', 10)
|
||||
size: parseInt(stats.size || '0', 10),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize Redis (compatibility function, Redis doesn't need explicit initialization)
|
||||
*/
|
||||
export async function initializeRedis(): Promise<void> {
|
||||
// Check connection
|
||||
await redisClient.ping();
|
||||
console.log('Redis initialized successfully');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Redis info for health check
|
||||
* Get Redis server info
|
||||
*/
|
||||
export async function getRedisInfo(): Promise<{
|
||||
connected: boolean;
|
||||
version: string;
|
||||
usedMemory: number;
|
||||
memory: string;
|
||||
dbSize: number;
|
||||
}> {
|
||||
const info = await redisClient.info('server');
|
||||
const memory = await redisClient.info('memory');
|
||||
const dbSize = await redisClient.dbsize();
|
||||
|
||||
// Parse Redis info string
|
||||
const parseInfo = (infoStr: string, key: string): string => {
|
||||
const match = infoStr.match(new RegExp(`${key}:(.+)`));
|
||||
return match ? match[1].trim() : 'unknown';
|
||||
};
|
||||
const versionMatch = info.match(/redis_version:([^\r\n]+)/);
|
||||
const memoryMatch = memory.match(/used_memory_human:([^\r\n]+)/);
|
||||
|
||||
return {
|
||||
connected: redisClient.status === 'ready',
|
||||
version: parseInfo(info, 'redis_version'),
|
||||
usedMemory: parseInt(parseInfo(memory, 'used_memory'), 10) || 0,
|
||||
dbSize
|
||||
version: versionMatch ? versionMatch[1] : 'unknown',
|
||||
memory: memoryMatch ? memoryMatch[1] : 'unknown',
|
||||
dbSize,
|
||||
};
|
||||
}
|
||||
|
||||
export { REDIS_HOST, REDIS_PORT };
|
||||
/**
|
||||
* Initialize Redis connection (just verify it's working)
|
||||
*/
|
||||
export async function initializeRedis(): Promise<void> {
|
||||
try {
|
||||
await redisClient.ping();
|
||||
console.log('Redis connection verified');
|
||||
} catch (error) {
|
||||
console.error('Error connecting to Redis:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,16 +20,15 @@
|
||||
*/
|
||||
|
||||
import Redis from 'ioredis';
|
||||
import { createReadStream, existsSync, readFileSync, writeFileSync, unlinkSync } from 'fs';
|
||||
import { createReadStream, existsSync, readFileSync, writeFileSync, unlinkSync, openSync, readSync, closeSync } from 'fs';
|
||||
import { resolve, basename } from 'path';
|
||||
import { createInterface } from 'readline';
|
||||
import crypto from 'crypto';
|
||||
import * as crypto from 'crypto';
|
||||
|
||||
const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
|
||||
const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
|
||||
const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
|
||||
const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
|
||||
const INDEX_NAME = 'hasher';
|
||||
const DEFAULT_BATCH_SIZE = 100;
|
||||
|
||||
interface HashDocument {
|
||||
@@ -90,13 +89,12 @@ function parseArgs(args: string[]): ParsedArgs {
|
||||
result.batchSize = parsed;
|
||||
}
|
||||
} else if (arg === '--batch-size') {
|
||||
// Support --batch-size <value> format
|
||||
const nextArg = args[i + 1];
|
||||
if (nextArg && !nextArg.startsWith('-')) {
|
||||
const parsed = parseInt(nextArg, 10);
|
||||
if (!isNaN(parsed) && parsed > 0) {
|
||||
result.batchSize = parsed;
|
||||
i++; // Skip next argument
|
||||
i++;
|
||||
}
|
||||
}
|
||||
} else if (arg.startsWith('--state-file=')) {
|
||||
@@ -108,7 +106,6 @@ function parseArgs(args: string[]): ParsedArgs {
|
||||
i++;
|
||||
}
|
||||
} else if (!arg.startsWith('-')) {
|
||||
// Positional argument - treat as file path
|
||||
result.filePath = arg;
|
||||
}
|
||||
}
|
||||
@@ -116,50 +113,7 @@ function parseArgs(args: string[]): ParsedArgs {
|
||||
return result;
|
||||
}
|
||||
|
||||
function getFileHash(filePath: string): string {
|
||||
// Create a hash based on file path and size for quick identification
|
||||
const stats = require('fs').statSync(filePath);
|
||||
const hashInput = `${filePath}:${stats.size}:${stats.mtime.getTime()}`;
|
||||
return crypto.createHash('md5').update(hashInput).digest('hex').substring(0, 8);
|
||||
}
|
||||
|
||||
function getDefaultStateFile(filePath: string): string {
|
||||
const fileName = basename(filePath).replace(/\.[^.]+$/, '');
|
||||
return resolve(`.indexer-state-${fileName}.json`);
|
||||
}
|
||||
|
||||
function loadState(stateFile: string): IndexerState | null {
|
||||
try {
|
||||
if (existsSync(stateFile)) {
|
||||
const data = readFileSync(stateFile, 'utf-8');
|
||||
return JSON.parse(data) as IndexerState;
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Could not load state file: ${error}`);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function saveState(stateFile: string, state: IndexerState): void {
|
||||
try {
|
||||
state.lastUpdate = new Date().toISOString();
|
||||
writeFileSync(stateFile, JSON.stringify(state, null, 2), 'utf-8');
|
||||
} catch (error) {
|
||||
console.error(`❌ Could not save state file: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
function deleteState(stateFile: string): void {
|
||||
try {
|
||||
if (existsSync(stateFile)) {
|
||||
unlinkSync(stateFile);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Could not delete state file: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function generateHashes(plaintext: string): Promise<HashDocument> {
|
||||
function generateHashes(plaintext: string): HashDocument {
|
||||
return {
|
||||
plaintext,
|
||||
md5: crypto.createHash('md5').update(plaintext).digest('hex'),
|
||||
@@ -194,78 +148,181 @@ Environment Variables:
|
||||
REDIS_DB Redis database number (default: 0)
|
||||
|
||||
Examples:
|
||||
npx tsx scripts/index-file.ts wordlist.txt
|
||||
npx tsx scripts/index-file.ts wordlist.txt --batch-size=500
|
||||
npx tsx scripts/index-file.ts wordlist.txt --batch-size 500
|
||||
npx tsx scripts/index-file.ts wordlist.txt --no-resume
|
||||
npx tsx scripts/index-file.ts wordlist.txt --no-check
|
||||
npm run index-file -- wordlist.txt --batch-size=500 --no-check
|
||||
# Index a file with default settings
|
||||
npm run index-file -- wordlist.txt
|
||||
|
||||
State Management:
|
||||
The script automatically saves progress to a state file. If interrupted,
|
||||
it will resume from where it left off on the next run. Use --no-resume
|
||||
to start fresh.
|
||||
# Index with custom batch size
|
||||
npm run index-file -- wordlist.txt --batch-size=500
|
||||
|
||||
Duplicate Checking:
|
||||
By default, the script checks if each plaintext or hash already exists
|
||||
in the index before inserting. Use --no-check to skip this verification
|
||||
for faster indexing (useful when you're sure there are no duplicates).
|
||||
# Start fresh (ignore previous state)
|
||||
npm run index-file -- wordlist.txt --no-resume
|
||||
|
||||
# Skip duplicate checking for speed
|
||||
npm run index-file -- wordlist.txt --no-check
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
async function indexFile(filePath: string, batchSize: number, shouldResume: boolean, checkDuplicates: boolean, customStateFile: string | null) {
|
||||
function computeFileHash(filePath: string): string {
|
||||
// Use streaming for large files to avoid memory issues
|
||||
const hash = crypto.createHash('sha256');
|
||||
const input = createReadStream(filePath, { highWaterMark: 64 * 1024 }); // 64KB chunks
|
||||
|
||||
let buffer = Buffer.alloc(0);
|
||||
const fd = openSync(filePath, 'r');
|
||||
const chunkSize = 64 * 1024; // 64KB
|
||||
const readBuffer = Buffer.alloc(chunkSize);
|
||||
|
||||
try {
|
||||
let bytesRead;
|
||||
do {
|
||||
bytesRead = readSync(fd, readBuffer, 0, chunkSize, null);
|
||||
if (bytesRead > 0) {
|
||||
hash.update(readBuffer.subarray(0, bytesRead));
|
||||
}
|
||||
} while (bytesRead > 0);
|
||||
} finally {
|
||||
closeSync(fd);
|
||||
}
|
||||
|
||||
return hash.digest('hex');
|
||||
}
|
||||
|
||||
function getStateFilePath(filePath: string, customPath: string | null): string {
|
||||
if (customPath) {
|
||||
return resolve(customPath);
|
||||
}
|
||||
const fileName = basename(filePath);
|
||||
return resolve(`.indexer-state-${fileName}.json`);
|
||||
}
|
||||
|
||||
function loadState(stateFilePath: string): IndexerState | null {
|
||||
if (!existsSync(stateFilePath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const data = readFileSync(stateFilePath, 'utf-8');
|
||||
return JSON.parse(data);
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Could not load state file: ${error}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function saveState(stateFilePath: string, state: IndexerState): void {
|
||||
try {
|
||||
writeFileSync(stateFilePath, JSON.stringify(state, null, 2), 'utf-8');
|
||||
} catch (error) {
|
||||
console.error(`❌ Could not save state file: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
function deleteState(stateFilePath: string): void {
|
||||
try {
|
||||
if (existsSync(stateFilePath)) {
|
||||
unlinkSync(stateFilePath);
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Could not delete state file: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function countLines(filePath: string): Promise<number> {
|
||||
return new Promise((resolve, reject) => {
|
||||
let lineCount = 0;
|
||||
const rl = createInterface({
|
||||
input: createReadStream(filePath),
|
||||
crlfDelay: Infinity
|
||||
});
|
||||
|
||||
rl.on('line', () => lineCount++);
|
||||
rl.on('close', () => resolve(lineCount));
|
||||
rl.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const parsed = parseArgs(args);
|
||||
|
||||
if (parsed.showHelp || !parsed.filePath) {
|
||||
showHelp();
|
||||
process.exit(parsed.showHelp ? 0 : 1);
|
||||
}
|
||||
|
||||
const filePath = parsed.filePath!;
|
||||
const batchSize = parsed.batchSize;
|
||||
const checkDuplicates = parsed.checkDuplicates;
|
||||
|
||||
const absolutePath = resolve(filePath);
|
||||
|
||||
if (!existsSync(absolutePath)) {
|
||||
console.error(`❌ File not found: ${absolutePath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const stateFile = getStateFilePath(filePath, parsed.stateFile);
|
||||
const fileHash = computeFileHash(absolutePath);
|
||||
|
||||
let state: IndexerState;
|
||||
let resumingFrom = 0;
|
||||
|
||||
if (parsed.resume) {
|
||||
const loadedState = loadState(stateFile);
|
||||
if (loadedState && loadedState.fileHash === fileHash) {
|
||||
state = loadedState;
|
||||
resumingFrom = state.lastProcessedLine;
|
||||
console.log(`📂 Resuming from previous state: ${stateFile}`);
|
||||
} else {
|
||||
if (loadedState) {
|
||||
console.log('⚠️ File has changed or state file is from a different file. Starting fresh.');
|
||||
}
|
||||
state = {
|
||||
filePath: absolutePath,
|
||||
fileHash,
|
||||
lastProcessedLine: 0,
|
||||
totalLines: 0,
|
||||
indexed: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
startTime: Date.now(),
|
||||
lastUpdate: new Date().toISOString()
|
||||
};
|
||||
}
|
||||
} else {
|
||||
deleteState(stateFile);
|
||||
state = {
|
||||
filePath: absolutePath,
|
||||
fileHash,
|
||||
lastProcessedLine: 0,
|
||||
totalLines: 0,
|
||||
indexed: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
startTime: Date.now(),
|
||||
lastUpdate: new Date().toISOString()
|
||||
};
|
||||
}
|
||||
|
||||
if (state.totalLines === 0) {
|
||||
console.log('🔢 Counting lines...');
|
||||
state.totalLines = await countLines(absolutePath);
|
||||
}
|
||||
|
||||
const client = new Redis({
|
||||
host: REDIS_HOST,
|
||||
port: REDIS_PORT,
|
||||
password: REDIS_PASSWORD,
|
||||
db: REDIS_DB,
|
||||
retryStrategy: (times) => Math.min(times * 50, 2000),
|
||||
});
|
||||
|
||||
const absolutePath = resolve(filePath);
|
||||
const stateFile = customStateFile || getDefaultStateFile(absolutePath);
|
||||
const fileHash = getFileHash(absolutePath);
|
||||
|
||||
// State management
|
||||
let state: IndexerState = {
|
||||
filePath: absolutePath,
|
||||
fileHash,
|
||||
lastProcessedLine: 0,
|
||||
totalLines: 0,
|
||||
indexed: 0,
|
||||
skipped: 0,
|
||||
errors: 0,
|
||||
startTime: Date.now(),
|
||||
lastUpdate: new Date().toISOString()
|
||||
};
|
||||
|
||||
// Check for existing state
|
||||
const existingState = loadState(stateFile);
|
||||
let resumingFrom = 0;
|
||||
|
||||
if (shouldResume && existingState) {
|
||||
if (existingState.fileHash === fileHash) {
|
||||
state = existingState;
|
||||
resumingFrom = state.lastProcessedLine;
|
||||
state.startTime = Date.now(); // Reset start time for this session
|
||||
console.log(`📂 Found existing state, resuming from line ${resumingFrom}`);
|
||||
} else {
|
||||
console.log(`⚠️ File has changed since last run, starting fresh`);
|
||||
deleteState(stateFile);
|
||||
}
|
||||
} else if (!shouldResume) {
|
||||
deleteState(stateFile);
|
||||
}
|
||||
|
||||
console.log(`📚 Hasher Indexer`);
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT} (DB ${REDIS_DB})`);
|
||||
console.log(`Index: ${INDEX_NAME}`);
|
||||
console.log('');
|
||||
console.log('📚 Hasher Indexer');
|
||||
console.log('━'.repeat(42));
|
||||
console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT}`);
|
||||
console.log(`File: ${filePath}`);
|
||||
console.log(`Batch size: ${batchSize}`);
|
||||
console.log(`Check duplicates: ${checkDuplicates ? 'yes' : 'no (--no-check)'}`);
|
||||
console.log(`State file: ${stateFile}`);
|
||||
console.log(`Duplicate check: ${checkDuplicates ? 'enabled' : 'disabled (--no-check)'}`);
|
||||
if (resumingFrom > 0) {
|
||||
console.log(`Resuming from: line ${resumingFrom}`);
|
||||
console.log(`Already indexed: ${state.indexed}`);
|
||||
@@ -273,7 +330,6 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
||||
}
|
||||
console.log('');
|
||||
|
||||
// Handle interruption signals
|
||||
let isInterrupted = false;
|
||||
const handleInterrupt = () => {
|
||||
if (isInterrupted) {
|
||||
@@ -285,7 +341,6 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
||||
saveState(stateFile, state);
|
||||
console.log(`💾 State saved to ${stateFile}`);
|
||||
console.log(` Resume with: npx tsx scripts/index-file.ts ${filePath}`);
|
||||
console.log(` Or start fresh with: npx tsx scripts/index-file.ts ${filePath} --no-resume`);
|
||||
process.exit(0);
|
||||
};
|
||||
|
||||
@@ -293,13 +348,11 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
||||
process.on('SIGTERM', handleInterrupt);
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
console.log('🔗 Connecting to Redis...');
|
||||
await client.ping();
|
||||
console.log('✅ Connected successfully\n');
|
||||
|
||||
// Process file line by line using streams
|
||||
console.log('📖 Processing file...\n');
|
||||
console.log('📖 Reading file...\n');
|
||||
|
||||
let currentLineNumber = 0;
|
||||
let currentBatch: string[] = [];
|
||||
@@ -314,208 +367,128 @@ async function indexFile(filePath: string, batchSize: number, shouldResume: bool
|
||||
crlfDelay: Infinity
|
||||
});
|
||||
|
||||
const processBatch = async (batch: string[], lineNumber: number) => {
|
||||
if (batch.length === 0) return;
|
||||
if (isInterrupted) return;
|
||||
const processBatch = async (batch: string[]) => {
|
||||
if (batch.length === 0 || isInterrupted) return;
|
||||
|
||||
// Generate hashes for all items in batch first
|
||||
const batchWithHashes = await Promise.all(
|
||||
batch.map(async (plaintext: string) => ({
|
||||
plaintext,
|
||||
hashes: await generateHashes(plaintext)
|
||||
}))
|
||||
);
|
||||
const batchWithHashes = batch.map(plaintext => generateHashes(plaintext));
|
||||
|
||||
const pipeline = client.pipeline();
|
||||
let toIndex: typeof batchWithHashes = [];
|
||||
let toIndex = batchWithHashes;
|
||||
|
||||
if (checkDuplicates) {
|
||||
// Check which items already exist
|
||||
const existenceChecks = await Promise.all(
|
||||
batchWithHashes.map(async (item) => {
|
||||
const plaintextExists = await client.exists(`hash:plaintext:${item.plaintext}`);
|
||||
if (plaintextExists) return { item, exists: true };
|
||||
|
||||
// Check if any hash exists
|
||||
const md5Exists = await client.exists(`hash:index:md5:${item.hashes.md5}`);
|
||||
const sha1Exists = await client.exists(`hash:index:sha1:${item.hashes.sha1}`);
|
||||
const sha256Exists = await client.exists(`hash:index:sha256:${item.hashes.sha256}`);
|
||||
const sha512Exists = await client.exists(`hash:index:sha512:${item.hashes.sha512}`);
|
||||
|
||||
return {
|
||||
item,
|
||||
exists: md5Exists || sha1Exists || sha256Exists || sha512Exists
|
||||
};
|
||||
})
|
||||
batchWithHashes.map(doc => client.exists(`hash:plaintext:${doc.plaintext}`))
|
||||
);
|
||||
|
||||
for (const check of existenceChecks) {
|
||||
if (check.exists) {
|
||||
state.skipped++;
|
||||
sessionSkipped++;
|
||||
} else {
|
||||
toIndex.push(check.item);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No duplicate checking - index everything
|
||||
toIndex = batchWithHashes;
|
||||
const newDocs = batchWithHashes.filter((_doc, idx) => existenceChecks[idx] === 0);
|
||||
const existingCount = batchWithHashes.length - newDocs.length;
|
||||
|
||||
state.skipped += existingCount;
|
||||
sessionSkipped += existingCount;
|
||||
toIndex = newDocs;
|
||||
}
|
||||
|
||||
// Execute bulk operations
|
||||
if (toIndex.length > 0) {
|
||||
try {
|
||||
for (const item of toIndex) {
|
||||
const doc = item.hashes;
|
||||
const key = `hash:plaintext:${doc.plaintext}`;
|
||||
const pipeline = client.pipeline();
|
||||
|
||||
// Store main document
|
||||
pipeline.set(key, JSON.stringify(doc));
|
||||
for (const doc of toIndex) {
|
||||
const key = `hash:plaintext:${doc.plaintext}`;
|
||||
|
||||
// Create indexes for each hash type
|
||||
pipeline.set(`hash:index:md5:${doc.md5}`, doc.plaintext);
|
||||
pipeline.set(`hash:index:sha1:${doc.sha1}`, doc.plaintext);
|
||||
pipeline.set(`hash:index:sha256:${doc.sha256}`, doc.plaintext);
|
||||
pipeline.set(`hash:index:sha512:${doc.sha512}`, doc.plaintext);
|
||||
pipeline.set(key, JSON.stringify(doc));
|
||||
|
||||
// Update statistics
|
||||
pipeline.hincrby('hash:stats', 'count', 1);
|
||||
pipeline.hincrby('hash:stats', 'size', JSON.stringify(doc).length);
|
||||
}
|
||||
pipeline.set(`hash:index:md5:${doc.md5}`, doc.plaintext);
|
||||
pipeline.set(`hash:index:sha1:${doc.sha1}`, doc.plaintext);
|
||||
pipeline.set(`hash:index:sha256:${doc.sha256}`, doc.plaintext);
|
||||
pipeline.set(`hash:index:sha512:${doc.sha512}`, doc.plaintext);
|
||||
|
||||
const results = await pipeline.exec();
|
||||
pipeline.hincrby('hash:stats', 'count', 1);
|
||||
pipeline.hincrby('hash:stats', 'size', JSON.stringify(doc).length);
|
||||
}
|
||||
|
||||
// Count errors
|
||||
const errorCount = results?.filter(([err]) => err !== null).length || 0;
|
||||
const results = await pipeline.exec();
|
||||
|
||||
if (errorCount > 0) {
|
||||
state.errors += errorCount;
|
||||
sessionErrors += errorCount;
|
||||
const successCount = toIndex.length - errorCount;
|
||||
state.indexed += successCount;
|
||||
sessionIndexed += successCount;
|
||||
} else {
|
||||
state.indexed += toIndex.length;
|
||||
sessionIndexed += toIndex.length;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`\n❌ Error processing batch:`, error);
|
||||
state.errors += toIndex.length;
|
||||
sessionErrors += toIndex.length;
|
||||
const errorCount = results?.filter(([err]) => err !== null).length || 0;
|
||||
|
||||
if (errorCount > 0) {
|
||||
state.errors += errorCount;
|
||||
sessionErrors += errorCount;
|
||||
const successCount = toIndex.length - errorCount;
|
||||
state.indexed += successCount;
|
||||
sessionIndexed += successCount;
|
||||
} else {
|
||||
state.indexed += toIndex.length;
|
||||
sessionIndexed += toIndex.length;
|
||||
}
|
||||
}
|
||||
|
||||
// Update state
|
||||
state.lastProcessedLine = lineNumber;
|
||||
state.totalLines = lineNumber;
|
||||
state.lastUpdate = new Date().toISOString();
|
||||
|
||||
// Save state periodically (every 10 batches)
|
||||
if (lineNumber % (batchSize * 10) === 0) {
|
||||
saveState(stateFile, state);
|
||||
}
|
||||
const progress = ((state.lastProcessedLine / state.totalLines) * 100).toFixed(1);
|
||||
process.stdout.write(
|
||||
`\r⏳ Progress: ${state.lastProcessedLine}/${state.totalLines} (${progress}%) - ` +
|
||||
`Indexed: ${sessionIndexed}, Skipped: ${sessionSkipped}, Errors: ${sessionErrors} `
|
||||
);
|
||||
|
||||
// Progress indicator
|
||||
const elapsed = ((Date.now() - sessionStartTime) / 1000).toFixed(0);
|
||||
process.stdout.write(`\r⏳ Line: ${lineNumber} | Session: +${sessionIndexed} indexed, +${sessionSkipped} skipped | Total: ${state.indexed} indexed | Time: ${elapsed}s`);
|
||||
saveState(stateFile, state);
|
||||
};
|
||||
|
||||
for await (const line of rl) {
|
||||
if (isInterrupted) break;
|
||||
|
||||
currentLineNumber++;
|
||||
|
||||
// Skip already processed lines
|
||||
if (currentLineNumber <= resumingFrom) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const trimmedLine = line.trim();
|
||||
if (trimmedLine.length > 0) {
|
||||
// Only take first word (no spaces or separators)
|
||||
const firstWord = trimmedLine.split(/\s+/)[0];
|
||||
if (firstWord) {
|
||||
currentBatch.push(firstWord);
|
||||
if (isInterrupted) break;
|
||||
|
||||
if (currentBatch.length >= batchSize) {
|
||||
await processBatch(currentBatch, currentLineNumber);
|
||||
currentBatch = [];
|
||||
}
|
||||
}
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
currentBatch.push(trimmed);
|
||||
state.lastProcessedLine = currentLineNumber;
|
||||
|
||||
if (currentBatch.length >= batchSize) {
|
||||
await processBatch(currentBatch);
|
||||
currentBatch = [];
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining items in last batch
|
||||
if (currentBatch.length > 0 && !isInterrupted) {
|
||||
await processBatch(currentBatch, currentLineNumber);
|
||||
await processBatch(currentBatch);
|
||||
}
|
||||
|
||||
if (isInterrupted) {
|
||||
return;
|
||||
console.log('\n');
|
||||
|
||||
if (!isInterrupted) {
|
||||
const totalTime = ((Date.now() - sessionStartTime) / 1000).toFixed(2);
|
||||
const rate = (sessionIndexed / parseFloat(totalTime)).toFixed(2);
|
||||
|
||||
console.log('━'.repeat(42));
|
||||
console.log('✅ Indexing complete!');
|
||||
console.log('');
|
||||
console.log('📊 Session Statistics:');
|
||||
console.log(` Indexed: ${sessionIndexed}`);
|
||||
console.log(` Skipped: ${sessionSkipped}`);
|
||||
console.log(` Errors: ${sessionErrors}`);
|
||||
console.log(` Time: ${totalTime}s`);
|
||||
console.log(` Rate: ${rate} docs/sec`);
|
||||
console.log('');
|
||||
console.log('📈 Total Statistics:');
|
||||
console.log(` Total indexed: ${state.indexed}`);
|
||||
console.log(` Total skipped: ${state.skipped}`);
|
||||
console.log(` Total errors: ${state.errors}`);
|
||||
console.log('');
|
||||
|
||||
deleteState(stateFile);
|
||||
}
|
||||
|
||||
// No refresh needed for Redis
|
||||
console.log('\n\n✅ All data persisted to Redis');
|
||||
|
||||
// Delete state file on successful completion
|
||||
deleteState(stateFile);
|
||||
|
||||
const duration = ((Date.now() - sessionStartTime) / 1000).toFixed(2);
|
||||
const rate = sessionIndexed > 0 ? (sessionIndexed / parseFloat(duration)).toFixed(0) : '0';
|
||||
|
||||
console.log('\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||||
console.log('✅ Indexing complete!');
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Total lines processed: ${currentLineNumber}`);
|
||||
if (resumingFrom > 0) {
|
||||
console.log(`Lines skipped (resumed): ${resumingFrom}`);
|
||||
console.log(`Lines processed this session: ${currentLineNumber - resumingFrom}`);
|
||||
}
|
||||
console.log(`Successfully indexed (total): ${state.indexed}`);
|
||||
console.log(`Successfully indexed (session): ${sessionIndexed}`);
|
||||
console.log(`Skipped duplicates (total): ${state.skipped}`);
|
||||
console.log(`Skipped duplicates (session): ${sessionSkipped}`);
|
||||
console.log(`Errors (total): ${state.errors}`);
|
||||
console.log(`Session duration: ${duration}s`);
|
||||
console.log(`Session rate: ${rate} docs/sec`);
|
||||
console.log('');
|
||||
|
||||
await client.quit();
|
||||
} catch (error) {
|
||||
// Save state on error
|
||||
console.error('\n\n❌ Error:', error);
|
||||
saveState(stateFile, state);
|
||||
console.error(`\n💾 State saved to ${stateFile}`);
|
||||
console.error('❌ Error:', error instanceof Error ? error.message : error);
|
||||
console.log(`💾 State saved to ${stateFile}`);
|
||||
await client.quit();
|
||||
process.exit(1);
|
||||
} finally {
|
||||
// Remove signal handlers
|
||||
process.removeListener('SIGINT', handleInterrupt);
|
||||
process.removeListener('SIGTERM', handleInterrupt);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
const parsedArgs = parseArgs(args);
|
||||
|
||||
if (parsedArgs.showHelp || !parsedArgs.filePath) {
|
||||
showHelp();
|
||||
}
|
||||
|
||||
const filePath = parsedArgs.filePath as string;
|
||||
|
||||
// Validate file exists
|
||||
if (!existsSync(filePath)) {
|
||||
console.error(`❌ File not found: ${filePath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\n🔧 Configuration:`);
|
||||
console.log(` File: ${filePath}`);
|
||||
console.log(` Batch size: ${parsedArgs.batchSize}`);
|
||||
console.log(` Resume: ${parsedArgs.resume}`);
|
||||
console.log(` Check duplicates: ${parsedArgs.checkDuplicates}`);
|
||||
if (parsedArgs.stateFile) {
|
||||
console.log(` State file: ${parsedArgs.stateFile}`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
indexFile(filePath, parsedArgs.batchSize, parsedArgs.resume, parsedArgs.checkDuplicates, parsedArgs.stateFile).catch(console.error);
|
||||
main();
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
* Options:
|
||||
* --dry-run Show duplicates without removing them (default)
|
||||
* --execute Actually remove the duplicates
|
||||
* --field=<field> Check duplicates only on this field (plaintext, md5, sha1, sha256, sha512)
|
||||
* --batch-size=<number> Number of keys to scan in each batch (default: 1000)
|
||||
* --field=<field> Check duplicates only on this field (md5, sha1, sha256, sha512)
|
||||
* --help, -h Show this help message
|
||||
*/
|
||||
|
||||
@@ -23,10 +24,20 @@ const REDIS_HOST = process.env.REDIS_HOST || 'localhost';
|
||||
const REDIS_PORT = parseInt(process.env.REDIS_PORT || '6379', 10);
|
||||
const REDIS_PASSWORD = process.env.REDIS_PASSWORD || undefined;
|
||||
const REDIS_DB = parseInt(process.env.REDIS_DB || '0', 10);
|
||||
const INDEX_NAME = 'hasher';
|
||||
const DEFAULT_BATCH_SIZE = 1000;
|
||||
|
||||
interface HashDocument {
|
||||
plaintext: string;
|
||||
md5: string;
|
||||
sha1: string;
|
||||
sha256: string;
|
||||
sha512: string;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
interface ParsedArgs {
|
||||
dryRun: boolean;
|
||||
batchSize: number;
|
||||
field: string | null;
|
||||
showHelp: boolean;
|
||||
}
|
||||
@@ -39,18 +50,10 @@ interface DuplicateGroup {
|
||||
deletePlaintexts: string[];
|
||||
}
|
||||
|
||||
interface HashDocument {
|
||||
plaintext: string;
|
||||
md5: string;
|
||||
sha1: string;
|
||||
sha256: string;
|
||||
sha512: string;
|
||||
created_at: string;
|
||||
}
|
||||
|
||||
function parseArgs(args: string[]): ParsedArgs {
|
||||
const result: ParsedArgs = {
|
||||
dryRun: true,
|
||||
batchSize: DEFAULT_BATCH_SIZE,
|
||||
field: null,
|
||||
showHelp: false
|
||||
};
|
||||
@@ -64,6 +67,21 @@ function parseArgs(args: string[]): ParsedArgs {
|
||||
result.dryRun = true;
|
||||
} else if (arg === '--execute') {
|
||||
result.dryRun = false;
|
||||
} else if (arg.startsWith('--batch-size=')) {
|
||||
const value = arg.split('=')[1];
|
||||
const parsed = parseInt(value, 10);
|
||||
if (!isNaN(parsed) && parsed > 0) {
|
||||
result.batchSize = parsed;
|
||||
}
|
||||
} else if (arg === '--batch-size') {
|
||||
const nextArg = args[i + 1];
|
||||
if (nextArg && !nextArg.startsWith('-')) {
|
||||
const parsed = parseInt(nextArg, 10);
|
||||
if (!isNaN(parsed) && parsed > 0) {
|
||||
result.batchSize = parsed;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
} else if (arg.startsWith('--field=')) {
|
||||
result.field = arg.split('=')[1];
|
||||
} else if (arg === '--field') {
|
||||
@@ -89,8 +107,9 @@ Usage:
|
||||
Options:
|
||||
--dry-run Show duplicates without removing them (default)
|
||||
--execute Actually remove the duplicates
|
||||
--batch-size=<number> Number of keys to scan in each batch (default: 1000)
|
||||
--field=<field> Check duplicates only on this field
|
||||
Valid fields: plaintext, md5, sha1, sha256, sha512
|
||||
Valid fields: md5, sha1, sha256, sha512
|
||||
--help, -h Show this help message
|
||||
|
||||
Environment Variables:
|
||||
@@ -100,90 +119,81 @@ Environment Variables:
|
||||
REDIS_DB Redis database number (default: 0)
|
||||
|
||||
Examples:
|
||||
npx tsx scripts/remove-duplicates.ts # Dry run, show all duplicates
|
||||
npx tsx scripts/remove-duplicates.ts --execute # Remove all duplicates
|
||||
npx tsx scripts/remove-duplicates.ts --field=md5 # Check only md5 duplicates
|
||||
npx tsx scripts/remove-duplicates.ts --execute --field=plaintext
|
||||
# Dry run (show duplicates only)
|
||||
npm run remove-duplicates
|
||||
|
||||
Notes:
|
||||
- The script keeps the OLDEST document (by created_at) and removes newer duplicates
|
||||
- Always run with --dry-run first to review what will be deleted
|
||||
- Duplicates are checked across all hash fields by default
|
||||
# Actually remove duplicates
|
||||
npm run remove-duplicates -- --execute
|
||||
|
||||
# Check only MD5 duplicates
|
||||
npm run remove-duplicates -- --field=md5 --execute
|
||||
|
||||
Description:
|
||||
This script scans through all hash documents in Redis and identifies
|
||||
duplicates based on hash values. When duplicates are found, it keeps
|
||||
the oldest entry (by created_at) and marks the rest for deletion.
|
||||
`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
async function findDuplicatesForField(
|
||||
client: Redis,
|
||||
field: string
|
||||
field: 'md5' | 'sha1' | 'sha256' | 'sha512',
|
||||
batchSize: number
|
||||
): Promise<DuplicateGroup[]> {
|
||||
const duplicates: DuplicateGroup[] = [];
|
||||
const pattern = `hash:index:${field}:*`;
|
||||
const hashToPlaintexts: Map<string, string[]> = new Map();
|
||||
|
||||
console.log(` Scanning for ${field} duplicates...`);
|
||||
console.log(`🔍 Scanning ${field} indexes...`);
|
||||
|
||||
// Get all keys for this field type
|
||||
const pattern = field === 'plaintext'
|
||||
? 'hash:plaintext:*'
|
||||
: `hash:index:${field}:*`;
|
||||
let cursor = '0';
|
||||
let keysScanned = 0;
|
||||
|
||||
const keys = await client.keys(pattern);
|
||||
do {
|
||||
const [nextCursor, keys] = await client.scan(cursor, 'MATCH', pattern, 'COUNT', batchSize);
|
||||
cursor = nextCursor;
|
||||
keysScanned += keys.length;
|
||||
|
||||
// For hash indexes, group by hash value (not plaintext)
|
||||
const valueMap = new Map<string, string[]>();
|
||||
|
||||
if (field === 'plaintext') {
|
||||
// Each key is already unique for plaintext
|
||||
// Check for same plaintext with different created_at
|
||||
for (const key of keys) {
|
||||
const plaintext = key.replace('hash:plaintext:', '');
|
||||
if (!valueMap.has(plaintext)) {
|
||||
valueMap.set(plaintext, []);
|
||||
}
|
||||
valueMap.get(plaintext)!.push(plaintext);
|
||||
}
|
||||
} else {
|
||||
// For hash fields, get the plaintext and check if multiple plaintexts have same hash
|
||||
for (const key of keys) {
|
||||
const hashValue = key.replace(`hash:index:${field}:`, '');
|
||||
const hash = key.replace(`hash:index:${field}:`, '');
|
||||
const plaintext = await client.get(key);
|
||||
|
||||
if (plaintext) {
|
||||
if (!valueMap.has(hashValue)) {
|
||||
valueMap.set(hashValue, []);
|
||||
if (!hashToPlaintexts.has(hash)) {
|
||||
hashToPlaintexts.set(hash, []);
|
||||
}
|
||||
valueMap.get(hashValue)!.push(plaintext);
|
||||
hashToPlaintexts.get(hash)!.push(plaintext);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find groups with duplicates
|
||||
for (const [value, plaintexts] of valueMap) {
|
||||
const uniquePlaintexts = Array.from(new Set(plaintexts));
|
||||
process.stdout.write(`\r Keys scanned: ${keysScanned} `);
|
||||
} while (cursor !== '0');
|
||||
|
||||
if (uniquePlaintexts.length > 1) {
|
||||
// Get documents to compare timestamps
|
||||
const docs: { plaintext: string; doc: HashDocument }[] = [];
|
||||
console.log('');
|
||||
|
||||
for (const plaintext of uniquePlaintexts) {
|
||||
const docKey = `hash:plaintext:${plaintext}`;
|
||||
const docData = await client.get(docKey);
|
||||
if (docData) {
|
||||
docs.push({ plaintext, doc: JSON.parse(docData) });
|
||||
}
|
||||
}
|
||||
const duplicates: DuplicateGroup[] = [];
|
||||
|
||||
// Sort by created_at (oldest first)
|
||||
docs.sort((a, b) =>
|
||||
new Date(a.doc.created_at).getTime() - new Date(b.doc.created_at).getTime()
|
||||
for (const [hash, plaintexts] of hashToPlaintexts.entries()) {
|
||||
if (plaintexts.length > 1) {
|
||||
// Fetch documents to get created_at timestamps
|
||||
const docs = await Promise.all(
|
||||
plaintexts.map(async (pt) => {
|
||||
const data = await client.get(`hash:plaintext:${pt}`);
|
||||
return data ? JSON.parse(data) as HashDocument : null;
|
||||
})
|
||||
);
|
||||
|
||||
if (docs.length > 1) {
|
||||
const validDocs = docs.filter((doc): doc is HashDocument => doc !== null);
|
||||
|
||||
if (validDocs.length > 1) {
|
||||
// Sort by created_at, keep oldest
|
||||
validDocs.sort((a, b) => a.created_at.localeCompare(b.created_at));
|
||||
|
||||
duplicates.push({
|
||||
value,
|
||||
value: hash,
|
||||
field,
|
||||
plaintexts: docs.map(d => d.plaintext),
|
||||
keepPlaintext: docs[0].plaintext,
|
||||
deletePlaintexts: docs.slice(1).map(d => d.plaintext)
|
||||
plaintexts: validDocs.map(d => d.plaintext),
|
||||
keepPlaintext: validDocs[0].plaintext,
|
||||
deletePlaintexts: validDocs.slice(1).map(d => d.plaintext)
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -192,106 +202,24 @@ async function findDuplicatesForField(
|
||||
return duplicates;
|
||||
}
|
||||
|
||||
async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
const client = new Redis({
|
||||
host: REDIS_HOST,
|
||||
port: REDIS_PORT,
|
||||
password: REDIS_PASSWORD,
|
||||
db: REDIS_DB,
|
||||
});
|
||||
async function removeDuplicates(
|
||||
client: Redis,
|
||||
duplicates: DuplicateGroup[],
|
||||
dryRun: boolean
|
||||
): Promise<{ deleted: number; errors: number }> {
|
||||
let deleted = 0;
|
||||
let errors = 0;
|
||||
|
||||
const fields = parsedArgs.field
|
||||
? [parsedArgs.field]
|
||||
: ['md5', 'sha1', 'sha256', 'sha512'];
|
||||
|
||||
console.log(`🔍 Hasher Duplicate Remover`);
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT} (DB ${REDIS_DB})`);
|
||||
console.log(`Index: ${INDEX_NAME}`);
|
||||
console.log(`Mode: ${parsedArgs.dryRun ? '🔎 DRY RUN (no changes)' : '⚠️ EXECUTE (will delete)'}`);
|
||||
console.log(`Fields to check: ${fields.join(', ')}`);
|
||||
console.log('');
|
||||
console.log(`${dryRun ? '🔍 DRY RUN - Would delete:' : '🗑️ Deleting duplicates...'}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
// Test connection
|
||||
console.log('🔗 Connecting to Redis...');
|
||||
await client.ping();
|
||||
console.log('✅ Connected successfully\n');
|
||||
for (const dup of duplicates) {
|
||||
console.log(`Duplicate ${dup.field}: ${dup.value}`);
|
||||
console.log(` Keep: ${dup.keepPlaintext} (oldest)`);
|
||||
console.log(` Delete: ${dup.deletePlaintexts.join(', ')}`);
|
||||
|
||||
// Get index stats
|
||||
const stats = await client.hgetall('hash:stats');
|
||||
const totalCount = parseInt(stats.count || '0', 10);
|
||||
console.log(`📊 Total documents in index: ${totalCount}\n`);
|
||||
|
||||
const allDuplicates: DuplicateGroup[] = [];
|
||||
const seenPlaintexts = new Set<string>();
|
||||
|
||||
// Find duplicates for each field
|
||||
for (const field of fields) {
|
||||
console.log(`🔍 Checking duplicates for field: ${field}...`);
|
||||
const fieldDuplicates = await findDuplicatesForField(client, field);
|
||||
|
||||
// Filter out already seen plaintexts
|
||||
for (const dup of fieldDuplicates) {
|
||||
const newDeletePlaintexts = dup.deletePlaintexts.filter(p => !seenPlaintexts.has(p));
|
||||
if (newDeletePlaintexts.length > 0) {
|
||||
dup.deletePlaintexts = newDeletePlaintexts;
|
||||
newDeletePlaintexts.forEach(p => seenPlaintexts.add(p));
|
||||
allDuplicates.push(dup);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(` Found ${fieldDuplicates.length} duplicate groups for ${field}`);
|
||||
}
|
||||
|
||||
const totalToDelete = allDuplicates.reduce((sum, dup) => sum + dup.deletePlaintexts.length, 0);
|
||||
|
||||
console.log(`\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`📋 Summary:`);
|
||||
console.log(` Duplicate groups found: ${allDuplicates.length}`);
|
||||
console.log(` Documents to delete: ${totalToDelete}`);
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
|
||||
|
||||
if (allDuplicates.length === 0) {
|
||||
console.log('✨ No duplicates found! Index is clean.\n');
|
||||
await client.quit();
|
||||
return;
|
||||
}
|
||||
|
||||
// Show sample of duplicates
|
||||
console.log(`📝 Sample duplicates (showing first 10):\n`);
|
||||
const samplesToShow = allDuplicates.slice(0, 10);
|
||||
for (const dup of samplesToShow) {
|
||||
const truncatedValue = dup.value.length > 50
|
||||
? dup.value.substring(0, 50) + '...'
|
||||
: dup.value;
|
||||
console.log(` Field: ${dup.field}`);
|
||||
console.log(` Value: ${truncatedValue}`);
|
||||
console.log(` Keep: ${dup.keepPlaintext}`);
|
||||
console.log(` Delete: ${dup.deletePlaintexts.length} document(s)`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
if (allDuplicates.length > 10) {
|
||||
console.log(` ... and ${allDuplicates.length - 10} more duplicate groups\n`);
|
||||
}
|
||||
|
||||
if (parsedArgs.dryRun) {
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`🔎 DRY RUN - No changes made`);
|
||||
console.log(` Run with --execute to remove ${totalToDelete} duplicate documents`);
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n`);
|
||||
await client.quit();
|
||||
return;
|
||||
}
|
||||
|
||||
// Execute deletion
|
||||
console.log(`\n🗑️ Removing ${totalToDelete} duplicate documents...\n`);
|
||||
|
||||
let deleted = 0;
|
||||
let errors = 0;
|
||||
|
||||
for (const dup of allDuplicates) {
|
||||
if (!dryRun) {
|
||||
for (const plaintext of dup.deletePlaintexts) {
|
||||
try {
|
||||
const docKey = `hash:plaintext:${plaintext}`;
|
||||
@@ -301,7 +229,7 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
const doc: HashDocument = JSON.parse(docData);
|
||||
const pipeline = client.pipeline();
|
||||
|
||||
// Delete main document
|
||||
// Delete the main document
|
||||
pipeline.del(docKey);
|
||||
|
||||
// Delete all indexes
|
||||
@@ -322,58 +250,101 @@ async function removeDuplicates(parsedArgs: ParsedArgs) {
|
||||
deleted++;
|
||||
}
|
||||
}
|
||||
|
||||
process.stdout.write(`\r⏳ Progress: ${deleted + errors}/${totalToDelete} - Deleted: ${deleted}, Errors: ${errors}`);
|
||||
} catch (error) {
|
||||
console.error(`\n❌ Error deleting ${plaintext}:`, error);
|
||||
console.error(` Error deleting ${plaintext}:`, error);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get new count
|
||||
const newStats = await client.hgetall('hash:stats');
|
||||
const newCount = parseInt(newStats.count || '0', 10);
|
||||
|
||||
console.log('\n\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
|
||||
console.log('✅ Duplicate removal complete!');
|
||||
console.log(`━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━`);
|
||||
console.log(`Documents deleted: ${deleted}`);
|
||||
console.log(`Errors: ${errors}`);
|
||||
console.log(`Previous document count: ${totalCount}`);
|
||||
console.log(`New document count: ${newCount}`);
|
||||
console.log('');
|
||||
}
|
||||
|
||||
return { deleted, errors };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = process.argv.slice(2);
|
||||
const parsed = parseArgs(args);
|
||||
|
||||
if (parsed.showHelp) {
|
||||
showHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const validFields: Array<'md5' | 'sha1' | 'sha256' | 'sha512'> = ['md5', 'sha1', 'sha256', 'sha512'];
|
||||
const fieldsToCheck = parsed.field
|
||||
? [parsed.field as 'md5' | 'sha1' | 'sha256' | 'sha512']
|
||||
: validFields;
|
||||
|
||||
// Validate field
|
||||
if (parsed.field && !validFields.includes(parsed.field as any)) {
|
||||
console.error(`❌ Invalid field: ${parsed.field}`);
|
||||
console.error(` Valid fields: ${validFields.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const client = new Redis({
|
||||
host: REDIS_HOST,
|
||||
port: REDIS_PORT,
|
||||
password: REDIS_PASSWORD,
|
||||
db: REDIS_DB,
|
||||
});
|
||||
|
||||
console.log('');
|
||||
console.log('🔍 Hasher Duplicate Remover');
|
||||
console.log('━'.repeat(42));
|
||||
console.log(`Redis: ${REDIS_HOST}:${REDIS_PORT}`);
|
||||
console.log(`Mode: ${parsed.dryRun ? 'DRY RUN' : 'EXECUTE'}`);
|
||||
console.log(`Batch size: ${parsed.batchSize}`);
|
||||
console.log(`Fields to check: ${fieldsToCheck.join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
try {
|
||||
console.log('🔗 Connecting to Redis...');
|
||||
await client.ping();
|
||||
console.log('✅ Connected successfully\n');
|
||||
|
||||
const allDuplicates: DuplicateGroup[] = [];
|
||||
|
||||
for (const field of fieldsToCheck) {
|
||||
const duplicates = await findDuplicatesForField(client, field, parsed.batchSize);
|
||||
allDuplicates.push(...duplicates);
|
||||
console.log(` Found ${duplicates.length} duplicate groups for ${field}`);
|
||||
}
|
||||
|
||||
console.log('');
|
||||
console.log(`📊 Total duplicate groups found: ${allDuplicates.length}`);
|
||||
|
||||
if (allDuplicates.length === 0) {
|
||||
console.log('✅ No duplicates found!');
|
||||
} else {
|
||||
const totalToDelete = allDuplicates.reduce(
|
||||
(sum, dup) => sum + dup.deletePlaintexts.length,
|
||||
0
|
||||
);
|
||||
console.log(` Total documents to delete: ${totalToDelete}`);
|
||||
|
||||
const { deleted, errors } = await removeDuplicates(client, allDuplicates, parsed.dryRun);
|
||||
|
||||
if (!parsed.dryRun) {
|
||||
console.log('━'.repeat(42));
|
||||
console.log('✅ Removal complete!');
|
||||
console.log('');
|
||||
console.log('📊 Statistics:');
|
||||
console.log(` Deleted: ${deleted}`);
|
||||
console.log(` Errors: ${errors}`);
|
||||
} else {
|
||||
console.log('━'.repeat(42));
|
||||
console.log('💡 This was a dry run. Use --execute to actually remove duplicates.');
|
||||
}
|
||||
}
|
||||
|
||||
await client.quit();
|
||||
} catch (error) {
|
||||
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
|
||||
console.error('\n\n❌ Error:', error);
|
||||
await client.quit();
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
const parsedArgs = parseArgs(args);
|
||||
|
||||
if (parsedArgs.showHelp) {
|
||||
showHelp();
|
||||
}
|
||||
|
||||
// Validate field if provided
|
||||
const validFields = ['plaintext', 'md5', 'sha1', 'sha256', 'sha512'];
|
||||
if (parsedArgs.field && !validFields.includes(parsedArgs.field)) {
|
||||
console.error(`❌ Invalid field: ${parsedArgs.field}`);
|
||||
console.error(` Valid fields: ${validFields.join(', ')}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`\n🔧 Configuration:`);
|
||||
console.log(` Mode: ${parsedArgs.dryRun ? 'dry-run' : 'execute'}`);
|
||||
if (parsedArgs.field) {
|
||||
console.log(` Field: ${parsedArgs.field}`);
|
||||
} else {
|
||||
console.log(` Fields: all (md5, sha1, sha256, sha512)`);
|
||||
}
|
||||
console.log('');
|
||||
|
||||
removeDuplicates(parsedArgs).catch(console.error);
|
||||
main();
|
||||
|
||||
Referencia en una nueva incidencia
Block a user